add docker

This commit is contained in:
Fabrice Lamant
2025-12-06 14:55:13 +01:00
parent 3015fc2e78
commit fef4690b0b
24 changed files with 1186 additions and 446 deletions

View File

0
cache/.gitkeep vendored
View File

21
docker-compose.yaml Normal file
View File

@ -0,0 +1,21 @@
services:
scraper:
container_name: scraper
build: ./scraper
restart: unless-stopped
volumes:
- cache:/app/cache
- data:/app/output
ui:
container_name: ui
build: ./ui
ports:
- "3000:3000"
restart: unless-stopped
volumes:
- data:/app/data
volumes:
cache:
data:

2
scraper/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
cache/
output/

16
scraper/Dockerfile Normal file
View File

@ -0,0 +1,16 @@
FROM node:lts-alpine AS base
WORKDIR /app
COPY package*.json ./
RUN npm install
FROM base AS build
WORKDIR /app
COPY . .
RUN npm run build
FROM node:lts-alpine AS runner
WORKDIR /app
COPY --from=base /app/node_modules ./node_modules
COPY --from=build /app/dist .
ENV DEBUG=olympics-calendar:*
CMD ["node", "index.js"]

View File

@ -1,30 +1,32 @@
import Debug from "debug";
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
const debug = Debug(`olympics-calendar:cache`);
export class Cache {
private debug = Debug("olympics-calendar:cache");
const cachePath = (key: string): string => {
return `../cache/${key}.cached`;
}
private cachePath = (key: string): string => {
return `./cache/${key}.cached`;
};
export const get = (key: string): string | null => {
debug(`get: key=${key}`);
const path = cachePath(key);
if (existsSync(path)) {
return readFileSync(path, "utf-8");
public get(key: string): string | null {
this.debug("get", key);
const path = this.cachePath(key);
if (existsSync(path)) {
return readFileSync(path, "utf-8");
}
return null;
}
return null;
}
export const has = (key: string): boolean => {
debug(`has: key=${key}`);
const path = cachePath(key);
return existsSync(path);
}
public has(key: string): boolean {
this.debug(`has: key=${key}`);
const path = this.cachePath(key);
return existsSync(path);
}
export const set = (key: string, data: string): void => {
debug(`set: key=${key}`);
const path = cachePath(key);
mkdirSync(path.split("/").slice(0, -1).join("/"), { recursive: true });
writeFileSync(path, data);
public set(key: string, data: string): void {
this.debug(`set: key=${key}`);
const path = this.cachePath(key);
mkdirSync(path.split("/").slice(0, -1).join("/"), { recursive: true });
writeFileSync(path, data);
}
}

View File

@ -1,26 +1,76 @@
import globals from "globals";
import pluginJs from "@eslint/js";
import eslint from "@eslint/js";
import perfectionist from "eslint-plugin-perfectionist";
import { defineConfig, globalIgnores } from "eslint/config";
import tseslint from "typescript-eslint";
export default [
export default defineConfig(
globalIgnores(["./dist/**", "./node_modules/**"]),
eslint.configs.recommended,
tseslint.configs.recommended,
{
files: ["**/*.js"],
languageOptions: {
sourceType: "commonjs",
plugins: {
perfectionist,
},
},
{
languageOptions: {
globals: globals.node,
},
},
pluginJs.configs.recommended,
{
rules: {
"comma-dangle": ["error", "always-multiline"],
"@typescript-eslint/no-explicit-any": "off",
"@typescript-eslint/no-unsafe-function-type": "off",
"@typescript-eslint/no-unused-vars": ["error", { caughtErrors: "none" }],
"no-case-declarations": "off",
"comma-dangle": ["error", "only-multiline"],
complexity: ["error", 15],
quotes: ["error", "double"],
semi: ["error", "always"],
"perfectionist/sort-imports": [
"error",
{ order: "asc", type: "natural" },
],
"perfectionist/sort-classes": [
"error",
{
groups: [
"index-signature",
["private-static-property", "private-static-accessor-property"],
["protected-static-property", "protected-static-accessor-property"],
["static-property", "static-accessor-property"],
["private-property", "private-accessor-property"],
["protected-property", "protected-accessor-property"],
["property", "accessor-property"],
"constructor",
["private-static-get-method", "private-static-set-method"],
["protected-static-get-method", "protected-static-set-method"],
["static-get-method", "static-set-method"],
["private-get-method", "private-set-method"],
["protected-get-method", "protected-set-method"],
["get-method", "set-method"],
["private-static-method", "private-static-function-property"],
["protected-static-method", "protected-static-function-property"],
["static-method", "static-function-property"],
["private-method", "private-function-property"],
["protected-method", "protected-function-property"],
["method", "function-property"],
"static-block",
"unknown",
],
ignoreCase: true,
order: "asc",
type: "alphabetical",
},
],
"perfectionist/sort-named-exports": [
"error",
{ order: "asc", type: "natural" },
],
"no-console": "off",
},
},
];
);

120
scraper/ics-generator.ts Normal file
View File

@ -0,0 +1,120 @@
import Debug from "debug";
import { mkdirSync, writeFileSync } from "fs";
import { getFlag } from "./nocs";
import { Calendar } from "./types";
export class ICSGenerator {
private calendar: Calendar;
private debug = Debug("olympics-calendar:ics-generator");
constructor(calendar: Calendar) {
this.calendar = calendar;
}
private generateICSFile(
sportKey: string | null,
nocKey: string | null,
): void {
this.debug(
"generateICSFile",
sportKey || "all-sports",
nocKey || "all-nocs",
);
this.calendar.languages.forEach((lang) => {
const pathSportKey = sportKey ? sportKey : "all-sports";
const pathNocKey = nocKey ? nocKey : "calendar";
const filepath = `./output/${lang.code.toLowerCase()}/${pathSportKey.toLowerCase()}/${pathNocKey.toLowerCase()}.ics`;
mkdirSync(filepath.split("/").slice(0, -1).join("/"), { recursive: true });
const titleComponents = [];
if (nocKey) {
titleComponents.push(
`${this.calendar.nocs.find((n) => n.key === nocKey)!.name[lang.code]}`,
);
}
if (sportKey) {
titleComponents.push(this.calendar.sports.find((s) => s.key === sportKey)!.name[lang.code]);
}
titleComponents.push("Milano Cortina 2026");
const title = titleComponents.join(" - ");
const lines = [];
lines.push("BEGIN:VCALENDAR");
lines.push("VERSION:2.0");
lines.push(
`PRODID:-//fabrice404//olympics-calendar//${lang.code}/${pathSportKey}/${pathNocKey}`,
);
lines.push(`X-WR-CALNAME:${title}`);
lines.push(`NAME:${title}`);
this.calendar.events
.filter((event) => {
if (sportKey && event.sport !== sportKey) return false;
if (nocKey) {
if (event.match) {
const team1Key = event.match.team1.key;
const team2Key = event.match.team2.key;
if (team1Key !== nocKey && team2Key !== nocKey) {
return false;
}
} else {
return false;
}
}
return true;
})
.forEach((event) => {
lines.push("BEGIN:VEVENT");
lines.push(`UID:${event.key.replace(/--/g, "-")}`);
lines.push(`DTSTAMP:${event.start.replace(/[-:]/g, "").replace(/\.\d+Z$/, "Z")}`);
lines.push(`DTSTART:${event.start.replace(/[-:]/g, "").replace(/\.\d+Z$/, "Z")}`);
lines.push(`DTEND:${event.end.replace(/[-:]/g, "").replace(/\.\d+Z$/, "Z")}`);
lines.push(`LOCATION:${event.location[lang.code] || ""}`);
const sport = this.calendar.sports.find(
(s) => s.key === event.sport,
)!;
lines.push(`DESCRIPTION:${sport.name[lang.code]} - ${event.name[lang.code] || ""}`);
const summary = `SUMMARY:${event.name[lang.code] || ""}`;
if (event.match) {
const team1Name = event.match.team1.name[lang.code] || event.match.team1.key;
const team1Flag = getFlag(event.match.team1.key);
const team2Name = event.match.team2.name[lang.code] || event.match.team2.key;
const team2Flag = getFlag(event.match.team2.key);
if (team1Name && team2Name) {
lines.push(`SUMMARY:${team1Flag} ${team1Name} - ${team2Name} ${team2Flag}`);
}
}
lines.push(summary);
lines.push("END:VEVENT");
});
lines.push("END:VCALENDAR");
writeFileSync(filepath, lines.join("\n"));
});
}
public generate(): void {
this.debug("generate");
this.generateICSFile(null, null);
this.calendar.sports.forEach((sport) => {
this.generateICSFile(sport.key, null);
this.calendar.nocs.forEach((noc) => {
this.generateICSFile(sport.key, noc.key);
});
});
this.calendar.nocs.forEach((noc) => {
this.generateICSFile(null, noc.key);
});
}
}

View File

@ -1,150 +0,0 @@
// BEGIN:VCALENDAR
// VERSION:2.0
// PRODID:-//fabrice404//olympics-calendar//archery/AUS//EN
// X-WR-CALNAME:🇦🇺 Australia Archery | Paris 2024
// NAME:🇦🇺 Australia Archery | Paris 2024
// BEGIN:VEVENT
// UID:20240725T073000Z-archery-WOMENS-INDIVIDUAL-RANKING-ROUND
// DTSTAMP:20240725T073000Z
// DTSTART:20240725T073000Z
// DTEND:20240725T103000Z
// DESCRIPTION:Archery - Women's Individual Ranking Round\n🇨🇳 AN
// Qixuan\n🇲🇽 Alejandra VALENCIA\n🇲🇩 Alexandra MIRCA\n🇵🇷 Alondra
// RIVERA\n🇫🇷 Amelie CORDEAU\n🇧🇷 Ana Luiza SLIACHTICAS CAETANO\n🇨🇴 Ana
// RENDON MARTINEZ\n🇲🇽 Ana VAZQUEZ\n🇲🇽 Angela RUIZ\n🇮🇳 Ankita
// BHAKAT\n🇲🇾 Ariana Nur Dania MOHAMAD ZAIRI\n🇮🇳 Bhajan KAUR\n🇬🇧
// Bryony PITMAN\n🇹🇼 CHIU Yi-Ching\n🇫🇷 Caroline LOPEZ\n🇺🇸 Casey
// KAUFHOLD\n🇺🇸 Catalina GNORIEGA\n🇩🇪 Charline SCHWARZ\n🇮🇹 Chiara
// REBAGLIATI\n🇮🇳 Deepika KUMARI\n🇸🇰 Denisa BARANKOVA\n🇮🇩 Diananda
// CHOIRUNISA\n🇪🇸 Elia CANALES\n🇹🇷 Elif Berra GOKKIR\n🇦🇹 Elisabeth
// STRAKA\n🇬🇳 Fatoumata SYLLA\n🇳🇱 Gaby SCHLOESSER\n🇸🇲 Giorgia
// CESARINI\n🇰🇷 JEON Hunyoung\n🇪🇬 Jana ALI\n🇺🇸 Jennifer MUCINO\n🇩🇪
// Katharina BAUER\n🇩🇰 Kirstine DANSTRUP ANDERSEN\n🇹🇼 LEI
// Chien-Ying\n🇨🇳 LI Jiaman\n🇹🇼 LI Tsai-Chi\n🇰🇷 LIM Sihyeon\n🇦🇺
// Laura PAEGLIS\n🇳🇱 Laura van der WINKEL\n🇫🇷 Lisa BARBELIN\n🇷🇴
// Madalina AMAISTROAIE\n🇨🇿 Marie HORACKOVA\n🇬🇧 Megs HAVERS\n🇩🇪
// Michelle KROPPEN\n🇮🇱 Mikaella MOSHE\n🇮🇷 Mobina FALLAH\n🇰🇷 NAM
// Suhyeon\n🇯🇵 NODA Satsuki\n🇲🇾 Nurul Azreena MOHAMAD FAZIL\n🇬🇧 Penny
// HEALEY\n🇳🇱 Quinty ROEFFEN\n🇪🇪 Reena PARNAT\n🇮🇩 Rezza OCTAVIA\n🇹🇳
// Rihab ELWALID\n🇲🇾 Syaqiera MASHAYIKH\n🇮🇩 Syifa Nurafifah KAMAL\n🇻🇳
// Thi Anh Nguyet DO\n🇺🇦 Veronika MARCHENKO\n🇨🇦 Virginie CHENIER\n🇵🇱
// Wioleta MYSZOR\n🇨🇳 YANG Xiaolei\n🇦🇿 Yaylagul RAMAZANOVA\n🇸🇮 Zana
// PINTARIC\n🇺🇿 Ziyodakhon ABDUSATTOROVA
// SUMMARY:🏹 Women's Individual Ranking Round
// LOCATION:Invalides
// END:VEVENT
import { mkdirSync, writeFileSync } from "fs";
import { Calendar } from "./types";
import { getFlag } from "./nocs";
// BEGIN:VCALENDAR
// VERSION:2.0
// PRODID:-//fabrice404//olympics-calendar//3x3-basketball/AUS//EN
// X-WR-CALNAME:🇦🇺 Australia 3x3 Basketball | Paris 2024
// NAME:🇦🇺 Australia 3x3 Basketball | Paris 2024
// BEGIN:VEVENT
// UID:20240730T160000Z-3x3-basketball-WOMENS-POOL-ROUND-AUS-CAN
// DTSTAMP:20240730T160000Z
// DTSTART:20240730T160000Z
// DTEND:20240730T162500Z
// DESCRIPTION:3x3 Basketball - Women's Pool Round
// SUMMARY:🏀 AUS 🇦🇺 - 🇨🇦 CAN
// LOCATION:La Concorde 1
// END:VEVENT
// END:VCALENDAR
export const generateICSFiles = (calendar: Calendar): void => {
generateICSFile(calendar, null, null);
calendar.sports.forEach((sport) => {
generateICSFile(calendar, sport.key, null);
calendar.nocs.forEach((noc) => {
generateICSFile(calendar, sport.key, noc.key);
});
});
calendar.nocs.forEach((noc) => {
generateICSFile(calendar, null, noc.key);
});
};
export const generateICSFile = (calendar: Calendar, sportKey: string | null, nocKey: string | null): void => {
calendar.languages.forEach((lang) => {
const pathSportKey = sportKey ? sportKey : "all-sports";
const pathNocKey = nocKey ? nocKey : "calendar"
const filepath = `../ui/public/data/${lang.code.toLowerCase()}/${pathSportKey.toLowerCase()}/${pathNocKey.toLowerCase()}.ics`;
mkdirSync(filepath.split('/').slice(0, -1).join('/'), { recursive: true });
const titleComponents = [];
if (nocKey) {
titleComponents.push(`${calendar.nocs.find(n => n.key === nocKey)!.name[lang.code]}`);
}
if (sportKey) {
titleComponents.push(calendar.sports.find(s => s.key === sportKey)!.name[lang.code]);
}
titleComponents.push("Milano Cortina 2026");
const title = titleComponents.join(' - ');
const lines = [];
lines.push("BEGIN:VCALENDAR");
lines.push("VERSION:2.0");
lines.push(`PRODID:-//fabrice404//olympics-calendar//${lang.code}/${pathSportKey}/${pathNocKey}`);
lines.push(`X-WR-CALNAME:${title}`);
lines.push(`NAME:${title}`);
calendar.events
.filter((event) => {
if (sportKey && event.sport !== sportKey) return false;
if (nocKey) {
if (event.match) {
const team1Key = event.match.team1.key;
const team2Key = event.match.team2.key;
if (team1Key !== nocKey && team2Key !== nocKey) {
return false;
}
} else {
return false;
}
}
return true;
})
.forEach((event) => {
lines.push("BEGIN:VEVENT");
lines.push(`UID:${event.key.replace(/--/g, '-')}`);
lines.push(`DTSTAMP:${event.start.replace(/[-:]/g, '').replace(/\.\d+Z$/, 'Z')}`);
lines.push(`DTSTART:${event.start.replace(/[-:]/g, '').replace(/\.\d+Z$/, 'Z')}`);
lines.push(`DTEND:${event.end.replace(/[-:]/g, '').replace(/\.\d+Z$/, 'Z')}`);
lines.push(`LOCATION:${event.location[lang.code] || ''}`);
const sport = calendar.sports.find(s => s.key === event.sport)!;
lines.push(`DESCRIPTION:${sport.name[lang.code]} - ${event.name[lang.code] || ''}`);
let summary = `SUMMARY:${event.name[lang.code] || ''}`
if (event.match) {
const team1Name = event.match.team1.name[lang.code] || event.match.team1.key;
const team1Flag = getFlag(event.match.team1.key);
const team2Name = event.match.team2.name[lang.code] || event.match.team2.key;
const team2Flag = getFlag(event.match.team2.key);
if (team1Name && team2Name) {
lines.push(`SUMMARY:${team1Flag} ${team1Name} - ${team2Name} ${team2Flag}`);
}
}
lines.push(summary);
lines.push(`END:VEVENT`);
})
lines.push("END:VCALENDAR");
writeFileSync(filepath, lines.join('\n'));
});
};

View File

@ -1,141 +1,12 @@
import Debug from "debug";
import nodeCron from "node-cron";
import * as cache from "./cache";
import { mkdirSync, writeFileSync } from "fs";
import { Calendar, Event, Sport, Team } from "./types";
import { generateICSFiles } from "./ics";
const baseUrl = "https://www.olympics.com";
const basePath = "/milano-cortina-2026/schedule/overview";
const debug = Debug(`olympics-calendar:index`);
const getScheduleOverview = async (language: string) => {
debug(`getScheduleOverview: language=${language}`);
const scheduleOverviewKey = `${language}/schedule-overview`;
if (!cache.has(scheduleOverviewKey)) {
debug(`Fetching ${baseUrl}/${language}${basePath}`);
const response = await fetch(`${baseUrl}/${language}/${basePath}`);
const page = await response.text();
const dataMatch = page.match(/<script id="__NEXT_DATA__" type="application\/json">([\s\S]*?)<\/script>/);
if (!dataMatch) {
throw new Error("Could not find __NEXT_DATA__ script tag");
}
const data = dataMatch[1];
cache.set(scheduleOverviewKey, JSON.stringify(JSON.parse(data), null, 2));
}
const scheduleOverview = JSON.parse(cache.get(scheduleOverviewKey)!);
return scheduleOverview;
};
const getScheduleSport = async (language: string, sportCode: string) => {
debug(`getScheduleSport: language=${language}, sportCode=${sportCode}`);
const scheduleSportKey = `${language}/${sportCode}`;
if (!cache.has(scheduleSportKey)) {
debug(`Fetching ${baseUrl}/${language}/milano-cortina-2026/schedule/${sportCode}`);
const response = await fetch(`${baseUrl}/${language}/milano-cortina-2026/schedule/${sportCode}`);
const page = await response.text();
const dataMatch = page.match(/<script id="__NEXT_DATA__" type="application\/json">([\s\S]*?)<\/script>/);
if (!dataMatch) {
return null;
debug(`No data found for sportCode=${sportCode} in language=${language}`);
}
const data = dataMatch[1];
cache.set(scheduleSportKey, JSON.stringify(JSON.parse(data), null, 2));
}
const scheduleSport = JSON.parse(cache.get(scheduleSportKey)!);
return scheduleSport;
};
import { Scraper } from "./scraper";
const main = async () => {
const overview = await getScheduleOverview("en");
const languages = overview.props.pageProps.page.template.properties.header.mainNav.languages
.filter((lang: any) => lang.link.match(/\/milano-cortina-2026\/schedule\/overview$/))
.map((lang: any) => ({
code: lang.lang,
name: lang.label,
}))
const sports: Sport[] = [];
const events: Event[] = [];
let nocs: Team[] = [];
for (const lang of languages) {
const scheduleOverview = await getScheduleOverview(lang.code);
const disciplines = scheduleOverview.props.pageProps.page.items
.find((item: any) => item.type === "module" && item.name === "scheduleGrid")
.data.disciplines;
for (const discipline of disciplines) {
const key = discipline.disciplineCode.toLowerCase();
if (key !== "cer") {
if (sports.find((s: any) => s.key === key) == null) {
sports.push({ key, name: {}, order: -1 })
}
const sport = sports.find((s: any) => s.key === key)!;
sport.name[lang.code] = discipline.description;
sport.order = discipline.order;
const scheduleSport = await getScheduleSport(lang.code, sport.key);
const scheduleList = scheduleSport.props.pageProps.page.items.find((item: any) => item.type === "module" && item.name === "scheduleList").data.schedules.map((schedule: any) => schedule.units).flat()
for (const scheduleListElement of scheduleList) {
if (events.find(e => e.key === scheduleListElement.unitCode) == null) {
events.push({
key: scheduleListElement.unitCode,
sport: sport.key,
start: scheduleListElement.startDateTimeUtc,
end: scheduleListElement.endDateTimeUtc,
isTraining: scheduleListElement.isTraining,
medal: scheduleListElement.medal,
name: {},
location: {},
})
}
const event = events.find(e => e.key === scheduleListElement.unitCode)!;
event.name[lang.code] = scheduleListElement.description;
event.location[lang.code] = scheduleListElement.venue?.description || ''
if (scheduleListElement.match) {
if (event.match == null) {
event.match = {
team1: { key: scheduleListElement.match.team1.teamCode.replace(/[^A-Z]/gi, ''), name: {} },
team2: { key: scheduleListElement.match.team2.teamCode.replace(/[^A-Z]/gi, ''), name: {} },
};
}
event.match.team1.name[lang.code] = (scheduleListElement.match.team1.description || '').replace(/\,/gi, '');
event.match.team2.name[lang.code] = (scheduleListElement.match.team2.description || '').replace(/\,/gi, '');
for (const team of [scheduleListElement.match.team1, scheduleListElement.match.team2]) {
const nocKey = team.teamCode.replace(/[^A-Z]/gi, '');
if (nocs.find(n => n.key === nocKey) == null) {
nocs.push({ key: nocKey, name: {} });
}
const noc = nocs.find(n => n.key === nocKey)!;
noc.name[lang.code] = (team.description || '').replace(/\,/gi, '');
}
}
}
}
}
}
nocs = nocs.filter((noc) => noc.key !== noc.name.en);
const dataFolder = "../ui/public/data";
mkdirSync(dataFolder, { recursive: true });
const calendar: Calendar = { languages, sports, nocs, events };
writeFileSync(`${dataFolder}/calendar.json`, JSON.stringify(calendar));
generateICSFiles(calendar);
nodeCron.schedule("* * * * *", async () => {
const scraper = new Scraper();
await scraper.scrape();
});
};
main();

View File

@ -209,4 +209,4 @@ export const flags: { [key: string]: string } = {
export const getFlag = (nocKey: string): string => {
return flags[nocKey.toUpperCase()] || "🏳️";
}
};

View File

@ -3,7 +3,7 @@
"ignore": [
"node_modules",
"cache/**",
"docs/**"
"output/**"
],
"ext": "ts,json,html,css"
"ext": "ts,json"
}

File diff suppressed because it is too large Load Diff

View File

@ -1,24 +1,28 @@
{
"name": "scraper",
"version": "1.0.0",
"description": "",
"license": "ISC",
"author": "",
"author": "Fabrice Lamant",
"type": "commonjs",
"main": "index.js",
"scripts": {
"start": "find ./cache/**/*.cached -mmin +10 -exec rm -f {} \\; | DEBUG=olympics-calendar* ts-node index.ts",
"build": "tsc --build --verbose",
"start": "DEBUG=olympics-calendar* ts-node index.ts",
"dev": "DEBUG=olympics-calendar* nodemon index.ts",
"lint": "eslint . --ext .ts"
},
"dependencies": {
"debug": "^4.4.3",
"eslint": "^9.39.1",
"node-cron": "^4.2.1",
"nodemon": "^3.1.11",
"ts-node": "^10.9.2",
"typescript": "^5.9.3"
"ts-node": "^10.9.2"
},
"devDependencies": {
"@types/debug": "^4.1.12"
"@eslint/js": "^9.39.1",
"@types/debug": "^4.1.12",
"@types/node": "^24.10.1",
"eslint": "^9.39.1",
"eslint-plugin-perfectionist": "^4.15.1",
"typescript": "^5.9.3",
"typescript-eslint": "^8.48.1"
}
}

188
scraper/scraper.ts Normal file
View File

@ -0,0 +1,188 @@
import Debug from "debug";
import { writeFileSync } from "fs";
import { Cache } from "./cache";
import { ICSGenerator } from "./ics-generator";
import { Calendar, Event, Language, PageData, Sport, Team } from "./types";
const BASE_URL = "https://www.olympics.com";
const BASE_SCHEDULE_PATH = "milano-cortina-2026/schedule/overview";
export class Scraper {
private cache = new Cache();
private debug = Debug("olympics-calendar:scraper");
private events: Event[] = [];
private languages: Language[] = [];
private nocs: Team[] = [];
private sports: Sport[] = [];
private async getPageData(path: string): Promise<PageData> {
this.debug(`getPageData: path=${path}`);
if (!this.cache.has(path)) {
const url = `${BASE_URL}${path}`;
const response = await fetch(url);
const page = await response.text();
const dataMatch = page.match(
/<script id="__NEXT_DATA__" type="application\/json">([\s\S]*?)<\/script>/,
);
if (!dataMatch) {
throw new Error(
`Could not find __NEXT_DATA__ script tag for URL: ${url}`,
);
}
const data = dataMatch[1];
if (data) {
this.cache.set(path, JSON.stringify(JSON.parse(data), null, 2));
}
}
return JSON.parse(this.cache.get(path)!);
}
private saveCalendar(): void {
this.debug("saveCalendar");
const calendar = this.getCalendar();
writeFileSync("./output/calendar.json", JSON.stringify(calendar));
}
private async scrapeEvents(): Promise<void> {
this.debug("scrapeEvents");
for (const sport of this.sports) {
for (const lang of this.languages) {
const data = await this.getPageData(
`/${lang.code}/milano-cortina-2026/schedule/${sport.key}`,
);
const scheduleList = data.props.pageProps.page.items
.find(
(item) => item.type === "module" && item.name === "scheduleList",
)!
.data.schedules.map((schedule) => schedule.units)
.flat();
for (const scheduleElement of scheduleList) {
if (
this.events.find((e) => e.key === scheduleElement.unitCode) == null
) {
this.events.push({
key: scheduleElement.unitCode,
sport: sport.key,
start: scheduleElement.startDateTimeUtc,
end: scheduleElement.endDateTimeUtc,
isTraining: scheduleElement.isTraining,
medal: scheduleElement.medal,
name: {},
location: {},
});
}
const event = this.events.find(
(e) => e.key === scheduleElement.unitCode,
)!;
event.name[lang.code] = scheduleElement.description;
event.location[lang.code] = scheduleElement.venue?.description || "";
if (scheduleElement.match) {
if (event.match == null) {
event.match = {
team1: {
key: scheduleElement.match.team1.teamCode.replace(
/[^A-Z]/gi,
"",
),
name: {},
},
team2: {
key: scheduleElement.match.team2.teamCode.replace(
/[^A-Z]/gi,
"",
),
name: {},
},
};
}
event.match.team1.name[lang.code] = (
scheduleElement.match.team1.description || ""
).replace(/,/gi, "");
event.match.team2.name[lang.code] = (
scheduleElement.match.team2.description || ""
).replace(/,/gi, "");
for (const team of [
scheduleElement.match.team1,
scheduleElement.match.team2,
]) {
const nocKey = team.teamCode.replace(/[^A-Z]/gi, "");
if (this.nocs.find((n) => n.key === nocKey) == null) {
this.nocs.push({ key: nocKey, name: {} });
}
const noc = this.nocs.find((n) => n.key === nocKey)!;
noc.name[lang.code] = (team.description || "").replace(/,/gi, "");
}
}
}
}
}
}
private async scrapeLanguages(): Promise<void> {
this.debug("scrapeLanguages");
const pageData = await this.getPageData(`/en/${BASE_SCHEDULE_PATH}`);
const languagesData =
pageData.props.pageProps.page.template.properties.header.mainNav
.languages;
this.languages = languagesData
.filter((lang) =>
lang.link.match(/\/milano-cortina-2026\/schedule\/overview$/),
)
.map((lang) => ({
code: lang.lang,
name: lang.label,
}));
}
private async scrapeSports(): Promise<void> {
this.debug("scrapeSports");
for (const lang of this.languages) {
this.debug(`Scraping language: ${lang.code}`);
const pageData = await this.getPageData(
`/${lang.code}/${BASE_SCHEDULE_PATH}`,
);
const disciplines = pageData.props.pageProps.page.items.find(
(item) => item.type === "module" && item.name === "scheduleGrid",
)!.data.disciplines;
for (const discipline of disciplines.filter(
(d) => d.disciplineCode.toLowerCase() !== "cer",
)) {
const key = discipline.disciplineCode.toLowerCase();
if (this.sports.find((s) => s.key === key) == null) {
this.sports.push({ key, name: {}, order: -1 });
}
const sport = this.sports.find((s) => s.key === key)!;
sport.name[lang.code] = discipline.description;
sport.order = discipline.order;
}
}
}
public getCalendar(): Calendar {
return {
languages: this.languages,
sports: this.sports,
nocs: this.nocs,
events: this.events,
};
}
public async scrape(): Promise<void> {
this.debug("scrape");
await this.scrapeLanguages();
await this.scrapeSports();
await this.scrapeEvents();
this.saveCalendar();
new ICSGenerator(this.getCalendar()).generate();
}
}

View File

@ -1,108 +1,41 @@
{
// Visit https://aka.ms/tsconfig to read more about this file
"compilerOptions": {
/* Visit https://aka.ms/tsconfig to read more about this file */
// File Layout
// "rootDir": "./src",
"outDir": "./dist",
/* Projects */
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
// Environment Settings
// See also https://aka.ms/tsconfig/module
"module": "nodenext",
"lib": ["esnext"],
"types": ["node"],
// and npm install -D @types/node
/* Language and Environment */
"target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
// "jsx": "preserve", /* Specify what JSX code is generated. */
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
// Other Outputs
"sourceMap": true,
"declaration": true,
"declarationMap": true,
/* Modules */
"module": "commonjs", /* Specify what module code is generated. */
// "rootDir": "./", /* Specify the root folder within your source files. */
// "moduleResolution": "node10", /* Specify how TypeScript looks up a file from a given module specifier. */
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
// "resolveJsonModule": true, /* Enable importing .json files. */
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
// Stricter Typechecking Options
"noUncheckedIndexedAccess": true,
"exactOptionalPropertyTypes": true,
/* JavaScript Support */
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
// Style Options
// "noImplicitReturns": true,
// "noImplicitOverride": true,
// "noUnusedLocals": true,
// "noUnusedParameters": true,
// "noFallthroughCasesInSwitch": true,
// "noPropertyAccessFromIndexSignature": true,
/* Emit */
// "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
"outDir": "./dist", /* Specify an output folder for all emitted files. */
// "removeComments": true, /* Disable emitting comments. */
// "noEmit": true, /* Disable emitting files from a compilation. */
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
// "newLine": "crlf", /* Set the newline character for emitting files. */
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
// "declarationDir": "./", /* Specify the output directory for generated declaration files. */
/* Interop Constraints */
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
// "isolatedDeclarations": true, /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
/* Type Checking */
"strict": true, /* Enable all strict type-checking options. */
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
/* Completeness */
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
"skipLibCheck": true /* Skip type checking all .d.ts files. */
// Recommended Options
"strict": true,
"jsx": "react-jsx",
"verbatimModuleSyntax": false,
"isolatedModules": true,
"noUncheckedSideEffectImports": true,
"moduleDetection": "force",
"skipLibCheck": true,
}
}

59
scraper/types.d.ts vendored
View File

@ -1,4 +1,3 @@
export interface MultilingualString {
[key: string]: string;
}
@ -30,7 +29,7 @@ export interface Event {
end: string;
sport: string;
isTraining: boolean;
medal: '0' | '1' | '3';
medal: "0" | "1" | "3";
name: MultilingualString;
location: MultilingualString;
match?: Match;
@ -42,3 +41,59 @@ export interface Calendar {
events: Event[];
nocs: Team[];
}
export interface PageData {
props: {
pageProps: {
page: {
template: {
properties: {
header: {
mainNav: {
languages: {
link: string;
lang: string;
label: string;
}[];
};
};
};
};
items: {
type: string;
name: string;
data: {
disciplines: {
disciplineCode: string;
order: number;
description: string;
}[];
schedules: {
units: {
unitCode: string;
startDateTimeUtc: string;
endDateTimeUtc: string;
isTraining: boolean;
medal: "0" | "1" | "3";
description: string;
venue: {
description: string;
};
match?: {
team1: {
teamCode: string;
description: string;
};
team2: {
teamCode: string;
description: string;
};
};
}[];
}[];
};
}[];
};
};
};
}

2
ui/.gitignore vendored
View File

@ -40,4 +40,4 @@ yarn-error.log*
*.tsbuildinfo
next-env.d.ts
/public/data/
/data/

21
ui/Dockerfile Normal file
View File

@ -0,0 +1,21 @@
FROM node:lts-alpine AS base
WORKDIR /app
COPY package*.json ./
RUN npm install
FROM base AS build
WORKDIR /app
COPY . .
RUN npm run build
FROM node:lts-alpine AS runner
WORKDIR /app
COPY --from=base /app/node_modules ./node_modules
COPY --from=build /app/public ./public
COPY --from=build /app/.next/standalone ./
COPY --from=build /app/.next/static ./.next/static
EXPOSE 3000
ENV PORT=3000
ENV HOSTNAME="0.0.0.0"
CMD ["node", "server.js"]

View File

@ -0,0 +1,24 @@
import { promises as fs } from "fs";
import { NextResponse } from "next/server";
import path from "path";
const DATA_FOLDER = path.resolve("data");
export async function GET(
request: Request,
{ params }: { params: Promise<{ slug?: string[] | undefined }> }
): Promise<NextResponse> {
try {
const { slug } = await params || [];
const filePath = slug ? path.join(DATA_FOLDER, ...slug) : null;
if (!filePath) throw new Error()
const content = await fs.readFile(filePath);
if (!content) throw new Error()
return new NextResponse(content, { status: 200 });
} catch (ex) {
console.log(ex);
return new NextResponse("File not found", { status: 404 });
}
}

View File

@ -1,6 +1,6 @@
export default function Flag({ iso3, name }: { iso3: string; name: string }) {
const iso3to2 = {
const iso3to2: { [key: string]: string } = {
AFG: "AF",
ALA: "AX",
ALB: "AL",

View File

@ -3,7 +3,6 @@
import { loadSchedule } from "../lib/data";
import { useEffect, useState } from "react";
import Flag from "./flag";
import { useSearchParams } from "next/navigation";
import { COPY, COPY_SUCCESS, FILTER_BY_COUNTRY, FILTER_BY_SPORT } from "../lib/text";
import useLocalStorage from "@/lib/local-storage";
@ -52,7 +51,7 @@ interface Calendar {
const COLORS = ['azzurro', 'giallo', 'rosa', 'rosso', 'verde', 'viola'];
export default function Home() {
const qs = useSearchParams();
const qs = typeof window !== 'undefined' ? window.location.search ? new URLSearchParams(window.location.search) : new URLSearchParams() : new URLSearchParams();
const [data, setData] = useState<Calendar | null>(null);
const [language, setLanguage] = useLocalStorage('lang', (navigator.language || 'en').split('-')[0]);
@ -95,7 +94,7 @@ export default function Home() {
const noc = qs.get('noc') || 'calendar';
const sport = qs.get('sport') || 'all-sports';
return `http://${host}/data/${language}/${sport}/${noc}.ics`;
return `http://${host}/api/data/${language}/${sport}/${noc}.ics`;
};
const getColor = (i: number) => COLORS[i % COLORS.length];

View File

@ -1,5 +1,5 @@
export const loadSchedule = async () => {
const response = await fetch('/data/calendar.json');
const response = await fetch('/api/data/calendar.json');
const data = await response.json();
return data;
};

View File

@ -1,7 +1,7 @@
import type { NextConfig } from "next";
const nextConfig: NextConfig = {
/* config options here */
output: "standalone",
};
export default nextConfig;