~shreyasminocha/leetcode-problem-scrape

81779c67e1f19539a2c2710fd9ea3d3e4b08ebcf — Shreyas Minocha 2 months ago f4c3ccd main
Add support for solution scraping
3 files changed, 77 insertions(+), 15 deletions(-)

M index.js
M package-lock.json
M package.json
M index.js => index.js +43 -13
@@ 1,8 1,10 @@
import got from 'got';
import * as fs from 'fs/promises';
import path from 'path';
import sleep from 'sleep';

const dumpPath = process.argv[2];
const {SESSION} = process.env;

let response = await got('https://leetcode.com/api/problems/all/').json();
const problems = response.stat_status_pairs;


@@ 17,22 19,50 @@ try {

for (const problem of problems) {
	const slug = problem.stat.question__title_slug;

	const problemPath = path.join(dumpPath, 'problems', `${slug}.json`);
	const solutionPath = path.join(dumpPath, 'problems', `${slug}.solution.json`);

	let problemFileExists = true;
	try { await fs.stat(problemPath); }
	catch { problemFileExists = false; }

	let solutionFileExists = true;
	try { await fs.stat(solutionPath); }
	catch { solutionFileExists = false; }

	if (!problemFileExists) {
		response = await got.post('https://leetcode.com/graphql', {
			json: {
				operationName: "questionData",
				variables: { titleSlug: slug },
				query: "query questionData($titleSlug: String!) {\n  question(titleSlug: $titleSlug) {\n    questionId\n    questionFrontendId\n    boundTopicId\n    title\n    titleSlug\n    content\n    translatedTitle\n    translatedContent\n    isPaidOnly\n    difficulty\n    likes\n    dislikes\n    isLiked\n    similarQuestions\n    exampleTestcases\n    categoryTitle\n    contributors {\n      username\n      profileUrl\n      avatarUrl\n      __typename\n    }\n    topicTags {\n      name\n      slug\n      translatedName\n      __typename\n    }\n    companyTagStats\n    codeSnippets {\n      lang\n      langSlug\n      code\n      __typename\n    }\n    stats\n    hints\n    solution {\n      id\n      canSeeDetail\n      paidOnly\n      hasVideoSolution\n      paidOnlyVideo\n      __typename\n    }\n    status\n    sampleTestCase\n    metaData\n    judgerAvailable\n    judgeType\n    mysqlSchemas\n    enableRunCode\n    enableTestMode\n    enableDebugger\n    envInfo\n    libraryUrl\n    adminUrl\n    challengeQuestion {\n      id\n      date\n      incompleteChallengeCount\n      streakCount\n      type\n      __typename\n    }\n    __typename\n  }\n}\n",
			},
		}).json();

	try {
		await fs.access(problemPath);
		continue;
	} catch (error) {}
		const { question } = response.data;
		fs.writeFile(problemPath, JSON.stringify(question));
	}

	response = await got.post('https://leetcode.com/graphql', {
		json: {
			operationName: "questionData",
			variables: { titleSlug: slug },
			query: "query questionData($titleSlug: String!) {\n  question(titleSlug: $titleSlug) {\n    questionId\n    questionFrontendId\n    boundTopicId\n    title\n    titleSlug\n    content\n    translatedTitle\n    translatedContent\n    isPaidOnly\n    difficulty\n    likes\n    dislikes\n    isLiked\n    similarQuestions\n    exampleTestcases\n    contributors {\n      username\n      profileUrl\n      avatarUrl\n      __typename\n    }\n    topicTags {\n      name\n      slug\n      translatedName\n      __typename\n    }\n    companyTagStats\n    codeSnippets {\n      lang\n      langSlug\n      code\n      __typename\n    }\n    stats\n    hints\n    solution {\n      id\n      canSeeDetail\n      paidOnly\n      hasVideoSolution\n      paidOnlyVideo\n      __typename\n    }\n    status\n    sampleTestCase\n    metaData\n    judgerAvailable\n    judgeType\n    mysqlSchemas\n    enableRunCode\n    enableTestMode\n    enableDebugger\n    envInfo\n    libraryUrl\n    adminUrl\n    __typename\n  }\n}\n",
		},
	}).json();
	if (!solutionFileExists) {
		response = await got.post('https://leetcode.com/graphql', {
			json: {
				operationName: 'QuestionNote',
				variables: { titleSlug: slug },
				query: 'query QuestionNote($titleSlug: String!) {\n  question(titleSlug: $titleSlug) {\n    questionId\n    article\n    solution {\n      id\n      content\n      contentTypeId\n      canSeeDetail\n      paidOnly\n      hasVideoSolution\n      paidOnlyVideo\n      rating {\n        id\n        count\n        average\n        userRating {\n          score\n          __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n}\n'
			},
			headers: {
				'Cookie': `LEETCODE_SESSION=${SESSION}`,
				'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:103.0) Gecko/20100101 Firefox/103.0',
			},
		}).json();

	const { question } = response.data;
		const solution = response.data.question.solution;
		if (solution !== null) {
			fs.writeFile(solutionPath, JSON.stringify(solution));
		}
	}

	fs.writeFile(problemPath, JSON.stringify(question));
	console.log(slug);
	sleep.sleep(4);
}

M package-lock.json => package-lock.json +32 -1
@@ 5,7 5,8 @@
  "packages": {
    "": {
      "dependencies": {
        "got": "^11.8.2"
        "got": "^11.8.2",
        "sleep": "^6.3.0"
      }
    },
    "node_modules/@sindresorhus/is": {


@@ 225,6 226,11 @@
        "node": ">=4"
      }
    },
    "node_modules/nan": {
      "version": "2.16.0",
      "resolved": "https://registry.npmjs.org/nan/-/nan-2.16.0.tgz",
      "integrity": "sha512-UdAqHyFngu7TfQKsCBgAA6pWDkT8MAO7d0jyOecVhN5354xbLqdn8mV9Tat9gepAupm0bt2DbeaSC8vS52MuFA=="
    },
    "node_modules/normalize-url": {
      "version": "6.1.0",
      "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-6.1.0.tgz",


@@ 285,6 291,18 @@
        "lowercase-keys": "^2.0.0"
      }
    },
    "node_modules/sleep": {
      "version": "6.3.0",
      "resolved": "https://registry.npmjs.org/sleep/-/sleep-6.3.0.tgz",
      "integrity": "sha512-+WgYl951qdUlb1iS97UvQ01pkauoBK9ML9I/CMPg41v0Ze4EyMlTgFTDDo32iYj98IYqxIjDMRd+L71lawFfpQ==",
      "hasInstallScript": true,
      "dependencies": {
        "nan": "^2.14.1"
      },
      "engines": {
        "node": ">=0.8.0"
      }
    },
    "node_modules/wrappy": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",


@@ 460,6 478,11 @@
      "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-1.0.1.tgz",
      "integrity": "sha512-j5EctnkH7amfV/q5Hgmoal1g2QHFJRraOtmx0JpIqkxhBhI/lJSl1nMpQ45hVarwNETOoWEimndZ4QK0RHxuxQ=="
    },
    "nan": {
      "version": "2.16.0",
      "resolved": "https://registry.npmjs.org/nan/-/nan-2.16.0.tgz",
      "integrity": "sha512-UdAqHyFngu7TfQKsCBgAA6pWDkT8MAO7d0jyOecVhN5354xbLqdn8mV9Tat9gepAupm0bt2DbeaSC8vS52MuFA=="
    },
    "normalize-url": {
      "version": "6.1.0",
      "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-6.1.0.tgz",


@@ 505,6 528,14 @@
        "lowercase-keys": "^2.0.0"
      }
    },
    "sleep": {
      "version": "6.3.0",
      "resolved": "https://registry.npmjs.org/sleep/-/sleep-6.3.0.tgz",
      "integrity": "sha512-+WgYl951qdUlb1iS97UvQ01pkauoBK9ML9I/CMPg41v0Ze4EyMlTgFTDDo32iYj98IYqxIjDMRd+L71lawFfpQ==",
      "requires": {
        "nan": "^2.14.1"
      }
    },
    "wrappy": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",

M package.json => package.json +2 -1
@@ 1,6 1,7 @@
{
  "type": "module",
  "dependencies": {
    "got": "^11.8.2"
    "got": "^11.8.2",
    "sleep": "^6.3.0"
  }
}