~ghuter/caoydl

b8254bb753405c1bfa43bda7a57e0e147ba95bee — ghuter 7 months ago 7062792 master
fix a bug which caused a segfault when "MAX_VIDEOS" is reached

modify arg parsing: now just one iteration and one can do smth like this:
`caoydl -S word and another word without quotes`

`struct Item` is now `Item`, etc.

util.c now includes util.h + new utility function: `argvcat(char**)`

add additional commentaries
6 files changed, 237 insertions(+), 168 deletions(-)

M caoydl.c
M node_processors.h
M util.c
M util.h
M youtube.c
M youtube.h
M caoydl.c => caoydl.c +124 -102
@@ 13,6 13,11 @@
char raw = 0;
char *argv0;

enum {
	SIMPLE_SEARCH = 0,
	EXTENDED_SEARCH = 1,
};

/* print: ignore control-characters */
void
printescape(const char *s)


@@ 34,9 39,9 @@ print_sanitized(const char *s)
}

void
print_vid(struct Item *v)
print_vid(Item *v)
{
	struct Stream *stream = v->stream;
	Stream *stream = v->stream;

	if (pledge("stdio", NULL) == -1) {
		fprintf(stderr, "pledge: %s\n", strerror(errno));


@@ 73,9 78,9 @@ print_vid(struct Item *v)
}

void
render(struct Items *r, char ext_search)
render(Items *r, char ext_search)
{
	struct Item *item = r->item;
	Item *item = r->item;
	size_t i;

	if (pledge("stdio", NULL) == -1) {


@@ 168,9 173,9 @@ render(struct Items *r, char ext_search)
}

void
render_chan_search(struct Items *r)
render_chan_search(Items *r)
{
	struct Item *chan = r->item;
	Item *chan = r->item;
	size_t i;

	if (pledge("stdio", NULL) == -1) {


@@ 190,9 195,9 @@ render_chan_search(struct Items *r)
}

void
render_chan_urls(struct Items *r)
render_chan_urls(Items *r)
{
	struct Item *vid = r->item;
	Item *vid = r->item;
	size_t i;

	if (pledge("stdio", NULL) == -1) {


@@ 212,9 217,9 @@ render_chan_urls(struct Items *r)
}

void
render_chan_pl(struct Items *r)
render_chan_pl(Items *r)
{
	struct Item *vid = r->item;
	Item *vid = r->item;
	size_t i;

	if (pledge("stdio", NULL) == -1) {


@@ 237,9 242,9 @@ render_chan_pl(struct Items *r)
}

void
render_playlist_urls(struct Items *r)
render_playlist_urls(Items *r)
{
	struct Item *vid = r->item;
	Item *vid = r->item;
	size_t i;

	if (pledge("stdio", NULL) == -1) {


@@ 269,113 274,130 @@ usage()
	exit(1);
}

int
main(int argc, char *argv[])
static void
search_vid(const char *s, const char ext_search)
{
	struct Items *r;
	struct Item *v;
	char search[1024];
	struct Id id;
	Items *r;

	char do_search, do_ext_search, do_search_chan, do_lspl;
	if (!uriencode(s, search, sizeof(search)))
		usage();
	r = srx(search, "relevance", 0);
	if (!r || r->nitem == 0)
		die("no video found.\n");
	render(r, ext_search);
}

static void
search_chan(const char *s)
{
	char search[1024];
	Items *r;

	if (!uriencode(s, search, sizeof(search)))
		usage();
	r = srx(search, "relevance", 1);
	if (!r || r->nitem == 0)
		die("no channel found.\n");
	render_chan_search(r);
}

static void
search_chan_pl(const char *s)
{
	Items *r;
	Id id;

	id = url2id(s);
	if (id.type != Channel)
		usage();

	r = get_items(id.id, CHAN_PLS);
	if (!r || r->nitem == 0)
		die("no playlist found.\n");
	render_chan_pl(r);
}

int
main(int argc, char *argv[])
{
	argv0 = argv[0];

	if (pledge("stdio dns inet rpath unveil", NULL) < 0) {
		fprintf(stderr, "pledge: %s\n", strerror(errno));
		exit(1);
		die("pledge: %s\n", strerror(errno));
	}
	if (unveil(TLS_CA_CERT_FILE, "r") < 0) {
		fprintf(stderr, "unveil: %s\n", strerror(errno));
		exit(1);
		die("unveil: %s\n", strerror(errno));
	}
	if (unveil(NULL, NULL) < 0) {
		fprintf(stderr, "unveil: %s\n", strerror(errno));
		exit(1);
		die("unveil: %s\n", strerror(errno));
	}

	if (argc < 2 || !argv[1][0])
		usage();

	do_search = do_ext_search = do_search_chan = do_lspl = 0;

	for (int i = 1; argv[i] && argv[i][0] && i < argc; i++) {
		if (argv[i][0] == '-' && argv[i][1] && !argv[i][2]) {
			switch (argv[i][1]) {
			case 'r':
				raw = 1;
				break;
			case 's':
				do_search = 1;
				break;
			case 'S':
				do_ext_search = 1;
				break;
			case 'c':
				do_search_chan = 1;
				break;
			case 'P':
				do_lspl = 1;
				break;
			default:
				usage();
				break;
			}
		} else if (do_search || do_ext_search) {
			if (!uriencode(argv[i], search, sizeof(search)))
				usage();
			r = srx(search, "relevance", 0);
			if (!r || r->nitem == 0)
				die("no video found.\n");
			render(r, do_ext_search);
		} else if (do_search_chan) {
			if (!uriencode(argv[i], search, sizeof(search)))
				usage();
			r = srx(search, "relevance", 1);
			if (!r || r->nitem == 0)
				die("no channel found.\n");
			render_chan_search(r);
		} else {
			id = url2id(argv[i]);

			switch (id.type) {
			case Video:
				v = get_vid(id.id);
				if (!v)
					die("unable to retrieve video information.\n");
				if (!raw)
					print_vid(v);
				break;
			case Channel:
				if (do_lspl) {
					r = get_items(id.id, CHAN_PLS);
					if (!r || r->nitem == 0)
						die("no playlist found.\n");
					render_chan_pl(r);
					do_lspl = 0;
					break;
				}
				r = get_items(id.id, CHAN_VIDS);
				if (!r)
					die("empty response.\n");
				if (r->nitem == 0)
					die("zero item found.\n");
				if (!raw)
					render_chan_urls(r);
				break;
			case Playlist:
				r = get_items(id.id, PL);
				if (!r)
					die("empty response.\n");
				if (r->nitem == 0)
					die("zero item found.\n");
				if (!raw)
				        render_playlist_urls(r);
				break;
			default:
				usage();
			}
	int i = 1;
	if (argv[i][0] == '-' && argv[i][1] == 'r' && argv[i][2] == '\0') {
		raw = 1;
		i++;
	}

	if (argv[i][0] == '-' && argv[i][1] != '\0' && argv[i][2] == '\0') {
		if (argc < i + 2)
			usage();

		switch (argv[i][1]) {
		case 's':
			search_vid(argvcat(&argv[i + 1]), SIMPLE_SEARCH);
			break;
		case 'S':
			search_vid(argvcat(&argv[i + 1]), EXTENDED_SEARCH);
			break;
		case 'c':
			search_chan(argvcat(&argv[i + 1]));
			break;
		case 'P':
			search_chan_pl(argv[i + 1]);
			break;
		default:
			usage();
			break;
		}
		exit(0);
	}

	Item *v = NULL;
	Items *r = NULL;
	Id id = url2id(argv[1]);

	switch (id.type) {
	case Video:
		v = get_vid(id.id);
		if (!v)
			die("error while retrieving video info.\n");
		if (!raw)
			print_vid(v);
		break;
	case Channel:
		r = get_items(id.id, CHAN_VIDS);
		if (!r)
			die("empty response.\n");
		if (r->nitem == 0)
			die("zero item found.\n");
		if (!raw)
			render_chan_urls(r);
		break;
	case Playlist:
		r = get_items(id.id, PL);
		if (!r)
			die("empty response.\n");
		if (r->nitem == 0)
			die("zero item found.\n");
		if (!raw)
			render_playlist_urls(r);
		break;
	default:
		usage();
	}

	return 0;

M node_processors.h => node_processors.h +15 -15
@@ 27,8 27,8 @@ static void
processnode_search(struct json_node *nodes, size_t depth, const char *value,
	void *pp)
{
	struct Items *r = (struct Items *)pp;
	static struct Item *vid;
	Items *r = (Items *)pp;
	static Item *vid;

	if (r->nitem > MAX_VIDEOS)
		return;


@@ 77,8 77,8 @@ static void
processnode_get_chan(struct json_node *nodes, size_t depth, const char *value,
	void *pp)
{
	struct Items *r = (struct Items *)pp;
	static struct Item *vid;
	Items *r = (Items *)pp;
	static Item *vid;

	if (r->nitem > MAX_VIDEOS)
		return;


@@ 105,8 105,8 @@ static void
processnode_chan(struct json_node *nodes, size_t depth, const char *value,
	void *pp)
{
	struct Items *r = (struct Items *)pp;
	static struct Item *vid;
	Items *r = (Items *)pp;
	static Item *vid;

	if (in_json_obj(nodes, depth, continuation_tok, nelem(continuation_tok)))
		memcpy(r->tok, value, sizeof(r->tok));


@@ 136,8 136,8 @@ static void
processnode_chan_pl(struct json_node *nodes, size_t depth, const char *value,
	void *pp)
{
	struct Items *r = (struct Items *)pp;
	static struct Item *vid;
	Items *r = (Items *)pp;
	static Item *vid;

	if (in_json_obj(nodes, depth, continuation_tok, nelem(continuation_tok)))
		memcpy(r->tok, value, sizeof(r->tok));


@@ 169,8 169,8 @@ static void
processnode_playlist(struct json_node *nodes, size_t depth, const char *value,
	void *pp)
{
	struct Items *r = (struct Items *)pp;
	static struct Item *vid;
	Items *r = (Items *)pp;
	static Item *vid;

	if (in_json_obj(nodes, depth, continuation_tok, nelem(continuation_tok)))
		memcpy(r->tok, value, sizeof(r->tok));


@@ 200,8 200,8 @@ static void
processnode_vid(struct json_node *nodes, size_t depth, const char *value,
	void *pp)
{
	struct Item *v = (struct Item*)pp;
	static struct Stream *stream;
	Item *v = (Item*)pp;
	static Stream *stream;

	if (v->nurls > MAX_URLS)
		return;


@@ 266,8 266,8 @@ static void
processnode_vid_api(struct json_node *nodes, size_t depth, const char *value,
	void *pp)
{
	struct Item *v = (struct Item*)pp;
	static struct Stream *stream;
	Item *v = (Item*)pp;
	static Stream *stream;

	if (v->nurls > MAX_URLS)
		return;


@@ 300,7 300,7 @@ static void
processnode_ytcfg(struct json_node *nodes, size_t depth, const char *value,
	void *pp)
{
	struct API_Infos *api = (struct API_Infos *)pp;
	API *api = (API *)pp;

	if (depth != 2 || nodes[depth - 1].type != TYPE_STRING)
		return;

M util.c => util.c +42 -2
@@ 4,6 4,9 @@
#include <stdlib.h>
#include <string.h>

#include "util.h"


void
die(const char *fmt, ...)
{


@@ 111,17 114,54 @@ emalloc(size_t sz)
	return p;
}

/*
 * concatenate two strings
 *
 * base : pointer to the beginning of the buffer
 * cap   : total size of the buffer
 * s1     : pointer to the location on the buffer where data from s2 will be copied
 * s2     : pointer to data from which to copy to s1
 *
 * returns a pointer to the written '\0' in the buffer.
 *
 *        |*********************************|
 *       ^             ^                                           ^
 *     base         s1                                         cap
 *
 * str_cat() will copy a maximum of cap - (s1 - base) bytes from s2 to s1, stopping
 * when '\0' is found (hence, s2 must be a '\0' terminated string).
 */
char*
str_cat(char *base, size_t cap, char *s1, char *s2)
{
	size_t rem_cap;
	size_t rem_cap;	/* remaining capacity */
	char *end;

	rem_cap = cap - (s1 - base);
	end = memccpy(s1, s2, '\0', rem_cap);

	if (!end)
		die("error while concatenating data.\n");
		die("str_cat: error: s2 is non '\0' terminated.\n");

	return end - 1;
}

/* concatenate argv strings
 * NOTE: if total num of chars > 1024, then a silent truncation occurs
 */
char*
argvcat(char *argv[])
{
#define BUFLEN 1024
	static char buf[BUFLEN];
	char *s = buf, *end = buf + BUFLEN;

	for (int i = 0; argv[i] && s < end; i++) {
		char *arg = argv[i];
		s = str_cat(buf, BUFLEN, s, arg);
	}

	/* buf is always '\0' terminated */
	buf[BUFLEN - 1] = '\0';
	return buf;
}

M util.h => util.h +3 -1
@@ 4,10 4,12 @@
#endif

#define nelem(x) (sizeof(x) / sizeof((x)[0]))
#define MIN(a, b) ((a) < (b) ? (a) : (b))

char	*humantime(char*);
int	 hexdigit(int c);
int	 uriencode(const char *s, char *buf, size_t bufsiz);
void	 die(const char *fmt, ...);
void	*emalloc(size_t);
void	*emalloc(size_t sz);
char	*str_cat(char *base, size_t cap, char *s1, char *s2);
char	*argvcat(char *argv[]);

M youtube.c => youtube.c +43 -44
@@ 11,16 11,17 @@
#include "youtube.h"

struct Data {
	char *s;
	char *s;	/* whole buffer pointer */
	struct {
		char *start;
		char *end;
		char **lm;
		char **rm;
		char **lm;	/* left markers array */
		char **rm;	/* right markers array */
		char *start;	/* start and end delimit the substring that is */
		char *end;	/* wrapped by the left and right markers */
	};
};

struct API_Infos {
/* YouTube API metadata */
struct API {
	char key[64];
	char version[8];
	char client_name[16];


@@ 36,12 37,16 @@ struct Req {
		char *order;
	};
	struct {
		struct API_Infos *api;
		struct API *api;
		char *payload;
		char *tok;
	};
};

typedef struct Data Data;
typedef struct API API;
typedef struct Req Req;

#include "node_processors.h"

static char


@@ 51,10 56,10 @@ idchr(const unsigned char c)
	return (c > 44 && c < 123 && !strchr("./:;<=>?@[\\]^`", c));
}

struct Id
url2id(char *url)
Id
url2id(const char *url)
{
	struct Id id = {};
	Id id = {};

	struct Url2id {
		char *s;


@@ 62,11 67,12 @@ url2id(char *url)
		int ltype;
		int idlen;
	} s2id[] = {
		#define STRANDLEN(s) (s), strlen((s))
		{ STRANDLEN("v="),       Video, 11, },
		{ STRANDLEN("embed/"),   Video, 11, },
		{ STRANDLEN("channel/"), Channel, 24, },
		{ STRANDLEN("list="),    Playlist, 34, },
		#define STRDEF(s) (s), strlen((s))
		{ STRDEF("v="),       Video, 11, },
		{ STRDEF("embed/"),   Video, 11, },
		{ STRDEF("channel/"), Channel, 24, },
		{ STRDEF("list="),    Playlist, 34, },
		#undef STRDEF
	};

	/* if our particular uri scheme is in use, skip it */


@@ 79,16 85,16 @@ url2id(char *url)
	switch (len) {
	case 11:
		id.type = Video;
		id.id = url;
		id.id = (char*)url;
		break;
	case 24:
		id.type = Channel;
		id.id = url;
		id.id = (char*)url;
		break;
	case 18: /* fallthrough */
	case 34:
		id.type = Playlist;
		id.id = url;
		id.id = (char*)url;
		break;
	default:
		for (int i = 0; i < nelem(s2id); i++) {


@@ 115,7 121,7 @@ url2id(char *url)
}

static char *
yt_request(struct Req *req)
yt_request(Req *req)
{
	static char path[4096];
	static char payload[2048];


@@ 140,7 146,7 @@ yt_request(struct Req *req)
		}
	} else if (req->payload) {
		size_t len;
		struct API_Infos *api;
		API *api;

		api = req->api;



@@ 256,7 262,7 @@ extractjson(const char *s, char **start, char **end, char *lbndry[], char *rbndr
}

int
get_json(struct Data *data, struct Req *req)
get_json(Data *data, Req *req)
{
	char **start = &(data->start);
	char **end = &(data->end);


@@ 278,20 284,18 @@ get_json(struct Data *data, struct Req *req)
	return 0;
}

struct Items*
Items*
srx(char *rawsearch, char *order, char srx_chan)
{
	struct Items *r;
	struct Data data;
	struct Req req;
	Items *r;
	int ret;

	data = (struct Data){
	Data data = {
		.lm = json_left_markers,
		.rm = NULL,
	};

	req = (struct Req){
	Req req = {
		.path = "/results?search_query=%s",
		.order = order,
		.query = rawsearch,


@@ 314,23 318,21 @@ srx(char *rawsearch, char *order, char srx_chan)
	return r;
}

struct Item*
Item*
get_vid(char *id)
{
	assert(id != NULL);

	struct API_Infos *api;
	struct Item *v;
	struct Data data;
	struct Req req;
	API *api;
	Item *v;
	int ret;

	req = (struct Req){
	Req req = {
		.path = "/watch?v=%s&bpctr=9999999999&has_verified=1",
		.id = id,
	};

	data = (struct Data){
	Data data = {
		.lm = json_left_markers,
		.rm = NULL,
	};


@@ 367,7 369,7 @@ get_vid(char *id)
		memcpy(api->client_name, "ANDROID", sizeof("ANDROID"));
		memcpy(api->client_version, "16.20", sizeof("16.20"));

		req = (struct Req){
		Req req = {
			.path = "/youtubei/%s/player?key=%s",
			.api = api,
			.id = id,


@@ 393,15 395,13 @@ get_vid(char *id)
	return v;
}

struct Items*
Items*
get_items(char *id, int type)
{
	assert(id != NULL);

	struct Data data;
	struct Req req;
	struct API_Infos *api;
	struct Items *r;
	API *api;
	Items *r;
	int ret;
	void (*node_processor)(struct json_node*, size_t, const char*, void*);
	char *path;


@@ 424,12 424,12 @@ get_items(char *id, int type)
		return NULL;
	}

	data = (struct Data){
	Data data = {
		.lm = (char *[]){ ";\nytcfg.set(", NULL },
		.rm = (char *[]){ "});", NULL },
	};

	req = (struct Req){
	Req req = {
		.path = path,
		.id = id,
	};


@@ 458,8 458,7 @@ get_items(char *id, int type)

	int max_cnt = 0;
	while (r->tok[0] !=  '\0' && max_cnt < 30) {
		// fprintf(stderr, "hit!\n");
		req = (struct Req){
		Req req = {
			.path = "/youtubei/%s/browse?key=%s",
			.api = api,
			.tok = r->tok,

M youtube.h => youtube.h +10 -4
@@ 9,7 9,8 @@ enum {
	CHAN_PLS,
};

enum LinkType {
/* Link type */
enum {
	Unknown = 0,
	Channel,
	Movie,


@@ 27,7 28,7 @@ struct Item {
	/* This structure also serves as a general item structure,
	   so it can also be a channel or a playlist */
	char linktype;
	struct Stream stream[MAX_URLS];
	struct Stream stream[MAX_URLS + 1];
	size_t nurls;
	char id[64];
	char title[1024];


@@ 43,7 44,7 @@ struct Item {
};

struct Items {
	struct Item item[MAX_VIDEOS];
	struct Item item[MAX_VIDEOS + 1];
	size_t nitem;
	char tok[512];
};


@@ 53,7 54,12 @@ struct Id {
	char *id;
};

extern struct Id url2id(char *url);
typedef struct Stream Stream;
typedef struct Item Item;
typedef struct Items Items;
typedef struct Id Id;

extern struct Id url2id(const char *url);

extern struct Items* srx(char *rawsearch, char *order, char srx_chan);
extern struct Items* get_items(char *id, int type);