~cadence/bibliogram

fd65ef664610a9d78d4f5f06bce7d358bb3e5b7d — Cadence Ember a month ago 39c8d71 master
Feeble attempt to support the new multi-format

This doesn't work, but it's a start. Somebody else can continue the effort.
3 files changed, 29 insertions(+), 16 deletions(-)

M src/lib/collectors.js
M src/lib/constants.js
M src/lib/utils/body.js
M src/lib/collectors.js => src/lib/collectors.js +5 -9
@@ 1,12 1,12 @@
const constants = require("./constants")
const {request} = require("./utils/request")
const switcher = require("./utils/torswitcher")
const {extractPreloader} = require("./utils/body")
const {selectExtractor} = require("./utils/body")
const {TtlCache, RequestCache, UserRequestCache} = require("./cache")
const RequestHistory = require("./structures/RequestHistory")
const fhp = require("fast-html-parser")
const db = require("./db")
require("./testimports")(constants, request, extractPreloader, UserRequestCache, RequestHistory, db)
require("./testimports")(constants, request, selectExtractor, UserRequestCache, RequestHistory, db)

const requestCache = new RequestCache(constants.caching.resource_cache_time)
/** @type {import("./cache").UserRequestCache<import("./structures/User")|import("./structures/ReelUser")>} */


@@ 34,7 34,7 @@ async function fetchUser(username, context) {
		return fetchUserFromHTML(username)
	}

	throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please use "iweb" instead (the default).`)
	throw new Error(`Your instance admin selected fetch mode ${mode}, which is now unsupported. Please ask them to use the default fetch mode by omitting that setting.`)
}

/**


@@ 68,12 68,8 @@ function fetchUserFromHTML(username) {
				// require down here or have to deal with require loop. require cache will take care of it anyway.
				// User -> Timeline -> TimelineEntry -> collectors -/> User
				const User = require("./structures/User")
				const preloader = extractPreloader(text)
				const profileInfoResponse = preloader.find(x => x.request.url === "/api/v1/users/web_profile_info/")
				if (!profileInfoResponse) {
					throw new Error("No profile info in the preloader.")
				}
				const user = new User(JSON.parse(profileInfoResponse.result.response).data.user)
				const userData = selectExtractor(text)
				const user = new User(userData)
				history.report("user", true)
				if (constants.caching.db_user_id) {
					const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username)

M src/lib/constants.js => src/lib/constants.js +1 -1
@@ 30,7 30,7 @@ let constants = {
		password: null, // If `null`, Bibliogram will run its own Tor process instead.
		port: 9051, // If a password is provided, Bibliogram will connect to Tor on this port. (This is ignored when running its own Tor process.)
		for: {
			user_html: false,
			user_html: true,
			timeline_graphql: false,
			post_graphql: false,
			reel_graphql: false

M src/lib/utils/body.js => src/lib/utils/body.js +23 -6
@@ 1,6 1,16 @@
const constants = require("../constants")
const {Parser} = require("./parser/parser")

function selectExtractor(text) {
	if (text.includes("window._sharedData = ")) {
		return extractSharedData(text)
	} else if (text.includes("PolarisQueryPreloaderCache")) {
		return extractPreloader(text)
	} else {
		throw constants.symbols.extractor_results.NO_SHARED_DATA
	}
}

/**
 * @param {string} text
 * @returns {{status: symbol, value: any}}


@@ 12,21 22,22 @@ function extractSharedData(text) {
		// Maybe the profile is age restricted?
		const age = getRestrictedAge(text)
		if (age !== null) { // Correct.
			return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: age}
			throw constants.symbols.extractor_results.AGE_RESTRICTED
		}
		return {status: constants.symbols.extractor_results.NO_SHARED_DATA, value: null}
		throw constants.symbols.extractor_results.NO_SHARED_DATA
	}
	parser.store()
	const end = parser.seek(";</script>")
	parser.restore()
	const sharedDataString = parser.slice(end - parser.cursor)
	const sharedData = JSON.parse(sharedDataString)
	console.log(sharedData)
	// check for alternate form of age restrictions
	if (sharedData.entry_data && sharedData.entry_data.HttpGatedContentPage) {
		// lazy fix; ideally extracting the age should be done here, but for the web ui it doesn't matter
		return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: null}
		// ideally extracting the age should be done here, but for the web ui it doesn't matter
		throw constants.symbols.extractor_results.AGE_RESTRICTED
	}
	return {status: constants.symbols.extractor_results.SUCCESS, value: sharedData}
	return sharedData.entry_data.ProfilePage[0].graphql.user
}

/**


@@ 43,7 54,12 @@ function extractPreloader(text) {
			entries.push(data)
		}
	}
	return entries
	// entries now has the things
	const profileInfoResponse = entries.find(x => x.request.url === "/api/v1/users/web_profile_info/")
	if (!profileInfoResponse) {
		throw new Error("No profile info in the preloader.")
	}
	return JSON.parse(profileInfoResponse.result.response).data.user
}

/**


@@ 61,5 77,6 @@ function getRestrictedAge(text) {
	return +match[1] // the age
}

module.exports.selectExtractor = selectExtractor
module.exports.extractSharedData = extractSharedData
module.exports.extractPreloader = extractPreloader