~cadence/bibliogram

91022aa5da82c15daedf7b3dc81e299722133a89 — Cadence Ember 26 days ago d660c84 master
Support loading shortcodes of a single image
M package-lock.json => package-lock.json +21 -0
@@ 1107,6 1107,11 @@
        "picomatch": "^2.0.4"
      }
    },
    "apollojs": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/apollojs/-/apollojs-1.3.0.tgz",
      "integrity": "sha1-X3sAME2XQOKnvltSx8CAfVH5JV4="
    },
    "append-transform": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/append-transform/-/append-transform-2.0.0.tgz",


@@ 1948,6 1953,22 @@
      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
      "dev": true
    },
    "fast-html-parser": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/fast-html-parser/-/fast-html-parser-1.0.1.tgz",
      "integrity": "sha1-TsyWg7i7ea/hGlCAe3hT55JWzqI=",
      "requires": {
        "apollojs": "^1.3.0",
        "entities": "^1.1.1"
      },
      "dependencies": {
        "entities": {
          "version": "1.1.2",
          "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz",
          "integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w=="
        }
      }
    },
    "fast-json-stable-stringify": {
      "version": "2.1.0",
      "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",

M package.json => package.json +1 -0
@@ 18,6 18,7 @@
  "dependencies": {
    "better-sqlite3": "^7.4.4",
    "cookie": "^0.4.1",
    "fast-html-parser": "^1.0.1",
    "feed": "git+https://git.sr.ht/~cadence/nodejs-feed#3dde82f8296d7a6f5659323e497e0c684f03ab71",
    "get-stream": "^6.0.1",
    "gm": "^1.23.1",

M src/lib/collectors.js => src/lib/collectors.js +87 -4
@@ 4,6 4,7 @@ const switcher = require("./utils/torswitcher")
const {extractSharedData} = require("./utils/body")
const {TtlCache, RequestCache, UserRequestCache} = require("./cache")
const RequestHistory = require("./structures/RequestHistory")
const fhp = require("fast-html-parser")
const db = require("./db")
require("./testimports")(constants, request, extractSharedData, UserRequestCache, RequestHistory, db)



@@ 398,6 399,7 @@ async function getOrFetchShortcode(shortcode) {
		const {result, fromCache} = await fetchShortcodeData(shortcode)
		const entry = getOrCreateShortcode(shortcode)
		entry.applyN3(result)
		entry.fullyUpdated = true // we already called fetchShortcodeData, which fetches the greatest amount of data possible. it's no use trying to fetch that again with .update().
		return {post: entry, fromCache}
	}
}


@@ 412,11 414,92 @@ function fetchShortcodeData(shortcode) {
		return switcher.request("post_graphql", `https://www.instagram.com/p/${shortcode}/embed/captioned/`, async res => {
			if (res.status === 429) throw constants.symbols.RATE_LIMITED
		}).then(res => res.text()).then(text => {
			const textData = text.match(/window\.__additionalDataLoaded\('extra',(.*)\);<\/script>/)[1]
			let data = JSON.parse(textData)
			let data = null
			const match = text.match(/window\.__additionalDataLoaded\('extra',(.*)\);<\/script>/)
			if (match) {
				const textData = match[1]
				data = JSON.parse(textData)
			}
			if (data == null) {
				// the thing doesn't exist
				throw constants.symbols.NOT_FOUND
				// we have to actually parse the HTML to get the data
				const root = fhp.parse(text)

				// Check if post really exists
				if (root.querySelector(".EmbedIsBroken")) {
					throw constants.symbols.NOT_FOUND
				}

				// find embed
				const e_embed = root.querySelector(".Embed")
				// find avatar
				const e_avatar = root.querySelector(".Avatar")
				const e_avatarImage = e_avatar.querySelector("img")
				// find username
				const e_usernameText = root.querySelector(".UsernameText")
				const e_viewProfile = root.querySelector(".ViewProfileButton")
				// find verified
				const e_verified = root.querySelector(".VerifiedSprite")
				// find media
				const e_media = root.querySelector(".EmbeddedMediaImage")
				// find caption
				const e_caption = root.querySelector(".Caption")
				// extract owner
				const owner = {
					id: e_embed.attributes["data-owner-id"],
					is_verified: !!e_verified,
					profile_pic_url: e_avatarImage.attributes.src,
					username: e_viewProfile.attributes.href.replace(new RegExp(`^https:\/\/www\.instagram\.com\/(${constants.external.username_regex}).*$`, "s"), "$1")
				}
				// extract media type
				let mediaType = e_embed.attributes["data-media-type"]
				const videoData = {}
				if (mediaType === "GraphVideo") {
					Object.assign(videoData, {
						video_url: null,
						video_view_count: null
					})
				} else {
					mediaType = "GraphImage"
				}
				// extract display resources
				const display_resources = e_media.attributes.srcset.split(",").map(source => {
					source = source.trim()
					const [url, widthString] = source.split(" ")
					const width = +widthString.match(/\d+/)[0]
					return {
						src: url,
						config_width: width,
						config_height: width // best guess!
					}
				})
				// extract caption text
				const captionText = e_caption.childNodes.slice(4, -3).map(node => { // slice removes unneeded starting and ending whitespace and user handles
					if (node.tagName === "br") {
						return "\n"
					} else {
						return node.text
					}
				}).join("")
				return {
					__typename: mediaType,
					id: e_embed.attributes["data-media-id"],
					display_url: e_media.attributes.src,
					display_resources,
					is_video: mediaType === "GraphVideo",
					shortcode,
					accessibility_caption: e_media.attributes.alt,
					...videoData,
					owner,
					edge_media_to_caption: {
						edges: [
							{
								node: {
									text: captionText
								}
							}
						]
					}
				}
			} else {
				data = data.shortcode_media
				history.report("post", true)

M src/lib/structures/TimelineChild.js => src/lib/structures/TimelineChild.js +2 -2
@@ 5,10 5,10 @@ const {compile} = require("pug")
require("../testimports")(collectors)

const rssImageTemplate = compile(`
img(src=constants.website_origin+entry.getDisplayUrlP() alt=entry.getAlt() width=entry.data.dimensions.width height=entry.data.dimensions.height)
img(src=constants.website_origin+entry.getDisplayUrlP() alt=entry.getAlt() width=entry.data.dimensions && entry.data.dimensions.width height=entry.data.dimensions && entry.data.dimensions.height)
`)
const rssVideoTemplate = compile(`
video(src=constants.website_origin+entry.getVideoUrlP() controls preload="auto" width=entry.data.dimensions.width height=entry.data.dimensions.height)
video(src=constants.website_origin+entry.getVideoUrlP() controls preload="auto" width=entry.data.dimensions && entry.data.dimensions.width height=entry.data.dimensions && entry.data.dimensions.height)
`)

class TimelineChild extends TimelineBaseMethods {

M src/lib/structures/TimelineEntry.js => src/lib/structures/TimelineEntry.js +14 -8
@@ 19,6 19,7 @@ each child in children
class TimelineEntry extends TimelineBaseMethods {
	constructor() {
		super()
		this.fullyUpdated = false
		/** @type {import("../types").TimelineEntryAll} some properties may not be available yet! */
		// @ts-ignore
		this.data = {}


@@ 38,12 39,16 @@ class TimelineEntry extends TimelineBaseMethods {
	}

	async update() {
		return collectors.fetchShortcodeData(this.data.shortcode).then(data => {
			this.applyN3(data.result)
		}).catch(error => {
			console.error("TimelineEntry could not self-update; trying to continue anyway...")
			console.error("E:", error)
		})
		if (!this.fullyUpdated) {
			return collectors.fetchShortcodeData(this.data.shortcode).then(data => {
				this.applyN3(data.result)
			}).catch(error => {
				console.error("TimelineEntry could not self-update; trying to continue anyway...")
				console.error("E:", error)
			}).finally(() => {
				this.fullyUpdated = true
			})
		}
	}

	/**


@@ 88,6 93,7 @@ class TimelineEntry extends TimelineBaseMethods {
	 * All mutations should act exactly once and have no effect on already mutated data.
	 */
	fixData() {
		this.hasDate = !!this.data.taken_at_timestamp
		this.date = new Date(this.data.taken_at_timestamp*1000)
	}



@@ 237,7 243,7 @@ class TimelineEntry extends TimelineBaseMethods {
		let fromCache = true
		const clone = await (async () => {
			// Do we just already have the extended owner?
			if (this.data.owner.full_name) { // this property is on extended owner and not basic owner
			if (this.data.owner.profile_pic_url) { // this property is on extended owner and not basic owner
				const clone = proxyExtendedOwner(this.data.owner)
				this.ownerPfpCacheP = clone.profile_pic_url
				return clone


@@ 246,7 252,7 @@ class TimelineEntry extends TimelineBaseMethods {
			else if (collectors.userRequestCache.getByID(this.data.owner.id)) {
				/** @type {import("./User")} */
				const user = collectors.userRequestCache.getByID(this.data.owner.id)
				if (user.data.full_name !== undefined) {
				if (user.data.profile_pic_url !== undefined) {
					this.data.owner = {
						id: user.data.id,
						username: user.data.username,

M src/site/pug/includes/post.pug => src/site/pug/includes/post.pug +5 -4
@@ 38,15 38,16 @@ mixin post(post, headerWithNavigation)
								- let caption = post.children[0].data.accessibility_caption
								if caption
									p.description= caption
							p.description
								span!= ll.pug_post_timestamp({post})
							if post.hasDate
								p.description
									span!= ll.pug_post_timestamp({post})

		section.images-gallery
			for entry in post.children
				if entry.isVideo()
					video(src=entry.getVideoUrlP() controls preload="auto" width=entry.data.dimensions.width height=entry.data.dimensions.height).sized-video
					video(src=entry.getVideoUrlP() controls preload="auto" width=entry.data.dimensions && entry.data.dimensions.width height=entry.data.dimensions && entry.data.dimensions.height).sized-video
				else
					img(src=entry.getDisplayUrlP() alt=entry.getAlt() width=entry.data.dimensions.width height=entry.data.dimensions.height).sized-image
					img(src=entry.getDisplayUrlP() alt=entry.getAlt() width=entry.data.dimensions && entry.data.dimensions.width height=entry.data.dimensions && entry.data.dimensions.height).sized-image
				if willDisplayAltInGallery
					- let caption = entry.data.accessibility_caption
					if caption

M src/site/pug/post.pug => src/site/pug/post.pug +6 -4
@@ 33,13 33,15 @@ html
		if firstEntry.isVideo()
			meta(property="og:video" content=`${website_origin}${firstEntry.getVideoUrlP()}`)
			meta(property="og:video:type" content="video/mp4")
			meta(property="og:video:width" content=firstEntry.data.dimensions.width)
			meta(property="og:video:height" content=firstEntry.data.dimensions.height)
			if firstEntry.data.dimensions
				meta(property="og:video:width" content=firstEntry.data.dimensions.width)
				meta(property="og:video:height" content=firstEntry.data.dimensions.height)
			meta(property="og:video:alt" content=firstEntry.getAlt())
		else
			meta(property="og:image" content=`${website_origin}${firstEntry.getDisplayUrlP()}`)
			meta(property="og:image:width" content=firstEntry.data.dimensions.width)
			meta(property="og:image:height" content=firstEntry.data.dimensions.height)
			if firstEntry.data.dimensions
				meta(property="og:image:width" content=firstEntry.data.dimensions.width)
				meta(property="og:image:height" content=firstEntry.data.dimensions.height)
			meta(property="og:image:type" content="image/jpeg")
			meta(property="og:image:alt" content=firstEntry.getAlt())
		meta(property="og:site_name" content="Bibliogram")