~fabrixxm/apthread

b7a260d5ee1d1e26e151d4135024c961022a663d — fabrixxm 2 months ago 6c09e9b
Refactor, archive page, attachments fixes

- move functions to `lib.php`
- add options to update and delete cache entry
- add archive page (list entries in cache)
- support video
- add 'noopener noreferrer' to links
- fix opengraph render
3 files changed, 237 insertions(+), 182 deletions(-)

A archive.php
M index.php
A lib.php
A archive.php => archive.php +20 -0
@@ 0,0 1,20 @@
<?php

require_once "lib.php";

$files = glob("t/*/*/*.html");

$title = "Archive";
$body = "<header><h1>Archive</h1></header>";
$footer = "";

$body .= "<ul>";
foreach($files as $file) {
	$page = file_get_contents($file);
	$og = parse_opengraph($page, "https//apthread.kirgroup.net/$file");
	$linktitle = str_replace(" -   AP Thread", "", $og['og:title']);
	$body .= "<li><a href='/{$file}'>{$linktitle}</a></li>";
}
$body .= "</ul>";

include "page.tpl.php";

M index.php => index.php +39 -182
@@ 1,166 1,10 @@
<?php

$debug = ($_GET['d'] ?? "0") == 1;


if ($debug) {
    ini_set('display_errors', 1);
    ini_set('display_startup_errors', 1);
    error_reporting(E_ALL);
}
function d() {
    echo "<pre>";
    call_user_func_array('var_dump', func_get_args());
}

function dd() {   
    call_user_func_array('d', func_get_args());
    die();
}


class HttpException extends Exception {}


function http_get($url, $accept='application/activity+json') {
    $options = [
        CURLOPT_RETURNTRANSFER => true,   // return web page
        CURLOPT_HEADER         => false,  // don't return headers
        CURLOPT_FOLLOWLOCATION => true,   // follow redirects
        CURLOPT_MAXREDIRS      => 10,     // stop after 10 redirects
        CURLOPT_ENCODING       => "",     // handle compressed
        CURLOPT_USERAGENT      => "apthread", // name of client
        CURLOPT_AUTOREFERER    => true,   // set referrer on redirect
        CURLOPT_CONNECTTIMEOUT => 120,    // time-out on connect
        CURLOPT_TIMEOUT        => 120,    // time-out on response
    ];

    $headers = [
        "Accept: $accept",
    ];

    $ch = curl_init($url);
    curl_setopt_array($ch, $options);
    curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
    $server_output = curl_exec ($ch);
    $res = curl_getinfo($ch);
    curl_close ($ch);

    if ($res['http_code'] >= 400) {
        throw new HttpException("HTTP Error " . $res['http_code']);
    }
    return $server_output;
}

function as_obj($data) {
    if (is_array($data)) {
        return $data;
    }
    elseif (is_string($data)) {
        try {
            return json_decode(http_get($data), true);
        } catch (HttpException $e) {
            error($e->getMessage());
        }
    }
    return null;
}

function as_url($obj) {
    if (is_array($obj)) return $obj['id'];
    return $obj;
}

function get_first_reply($obj) {
    $replies = as_obj($obj['replies'] ?? null);
    if (is_null($replies)) return null;

    $page = as_obj($replies['first']);
    $items = $page['items'] ?? [];
    if (count($items) > 0) {
        return as_obj($items[0]);
    }

    return null;
}

function title($obj, $max=8) {
    // $t = preg_replace("|<[^>]*>|", "", $obj['content']);
    $t = strip_tags($obj['content']);
    $t = html_entity_decode($t);
    $t = preg_split("|[.!?(\[{]|", $t)[0];
    $ta = preg_split("|\s+|", $t);
    $tb = array_slice($ta, 0, $max);
    $t = implode(" ", $tb);
    if (count($ta) > $max) $t = $t . "...";
    //$t = htmlentities($t);
    return $t;
}

$ogcache = [];
function get_opengraph($url) {
    if (isset($ogcache[$url])) {
        return $ogcache[$url];
    }

    try {
        $page = http_get($url, 'text/*');
    } catch (HttpException $e) {
        return null;
    }
    $dom = new DomDocument();
    $dom->loadHTML($page, LIBXML_NONET|LIBXML_NOERROR|LIBXML_NOWARNING);
    $xpath = new DOMXpath($dom);
    $items = $xpath->query('//meta[starts-with(@property, "og:")]');
    if ($items->length > 0) {
        $data = [];
        for ($i = 0; $i < $items->length; $i++) {
            $key = $items->item($i)->attributes->getNamedItem('property')->value;
            $value = $items->item($i)->attributes->getNamedItem('content')->value;
            $data[$key] = $value;
        }
        $ogcache[$url] = $data;
        return $data;
    } else {
        $items = $xpath->query('//title');
        if ($items->length > 0) {
            $parsed = parse_url($url,  PHP_URL_HOST);
            $data = [
                'og:type' => 'article',
                'og:url' => $url,
                'og:title' => $items->item(0)->nodeValue,
                'og:site_name' => parse_url($url,  PHP_URL_HOST),
            ];
            return $data;
        }
    }
    return null;
}


function first_url_in_text($obj) {
    $t = preg_replace('|<br[ /]*>|', "\n", $obj['content']);
    $t = preg_replace("|</p>([^\n])|", "</p>\n\$1", $obj['content']);
    $t = strip_tags($t);
    $toks  = preg_split('|(https?://)|', $t, 2, PREG_SPLIT_DELIM_CAPTURE);
    if (isset($toks[2])){
        [$_, $prot, $res] = $toks;
        [$url, $_] = preg_split('|[^A-Za-z0-9._~:\/?#[\]@!$&\'()*+,;=-]|', $res, 2);
        return  $prot . $url;
    }
    return null;
}


function error($error) {
    global $body, $footer, $title;
    $title .= " - Error";
    $body .= "<p>$error</p>";
    include "page.tpl.php";
    die();
}
require_once "lib.php";

$url = $_POST['url'] ?? '';
$update = ($_POST['update'] ?? '0') == '1';
$delete =  ($_POST['delete'] ?? '0') == '1';

$title = "";



@@ 172,13 16,21 @@ $body = "
    <p><label for='url'>Url: <input name='url' id='url' value='$url'></p>
    <p><button type='submit'>Render</button>
</form>";
$footer = "";
$footer = "<p><a href='archive.php'>Archive</a></p>";

if ($url  !== '') {
    if (substr($url, 0, 8) != "https://") error("Invalid request");
    $hash = hash('sha256', $url);
    $cachefile="t/" . substr($hash, 0, 2) . "/" . substr($hash, 2, 2) . "/" . substr($hash, 4, 8) . ".html";
    if (!file_exists($cachefile) || $debug) {
    if ($delete) {
        if (file_exists($cachefile)) {
            unlink($cachefile);
        }
        header("Location:/");
        die();
    }

    if (!file_exists($cachefile) || $debug || $update) {
        $obj = as_obj($url);
        if (is_null($obj) || !key_exists('attributedTo', $obj)) error("Invalid request");



@@ 194,10 46,10 @@ if ($url  !== '') {
            <header>
                <h1>$title</h1>
                <aside>
                    <img src=\"{$author['icon']['url']}\">
                    <img src='{$author['icon']['url']}'>
                    <div>
                        <a href=\"{$author_url}\">{$author['name']}</a>
                        <time datetime=\"{$obj['published']}\"><a href=\"{$obj['id']}\">{$obj['published']}</a></time>
                        <a href='{$author_url}' rel='noopener noreferrer'>{$author['name']}</a>
                        <time datetime='{$obj['published']}'><a href='{$obj['id']}' rel='noopener noreferrer'>{$obj['published']}</a></time>
                    </div>
                </aside>
            </header>


@@ 206,7 58,7 @@ if ($url  !== '') {
        $o = $obj;
        while (!is_null($o)) {
            $body .= $o['content'] . "\n\n";
            

            $attachmentsbody = "";
            $attachments = $o['attachment'] ?? [];
            $na = count($attachments);


@@ 214,7 66,10 @@ if ($url  !== '') {
                foreach($attachments as $a) {
                    [$mimemaj, $mimemin] = explode("/", $a['mediaType']);
                    if ($a['type'] == "Image" || $mimemaj == "image") {
                        $attachmentsbody .= "<a href=\"{$a['url']}\" style=\"background-image:url({$a['url']});\"></a>";
                        $attachmentsbody .= "<a href='{$a['url']}' style='background-image:url({$a['url']});' rel='noopener noreferrer'></a>";
                    }
                    if ($mimemaj == "video") {
                        $attachmentsbody .= "<video controls title='{$a['name']}'><source src='{$a['url']}' type='{$a['mediaType']}'/></video>";
                    }
                }
                if ($attachmentsbody != "") {


@@ 225,32 80,29 @@ if ($url  !== '') {
            }

            if ($attachmentsbody == "") {
                $url = first_url_in_text($o);
                if ($url) {
                    $og = get_opengraph($url);
                $url_url = first_url_in_text($o);
                if ($url_url) {
                    $og = get_opengraph($url_url);
                    if ($og) {
                        $ogtype = $og['og:type'] ?? "article";
                        $body .= "<div class='card $ogtype'>";
                        $body .= "<a class='card $ogtype' href='{$og['og:url']}' rel='noopener noreferrer'>";
                        if (isset($og['og:image']))  {
                            $body .= "<a class='card-image' href='{$og['og:image']}'>";
                            $body .= "<div class='card-image'>";
                            $body .= "<img src='{$og['og:image']}'>";
                            $body .= "</a>";
                            $body .= "</div>";
                        } else if ($ogtype == 'article') {
                            $body .= file_get_contents('imgs/rich-text-symbolic.svg');
                        }
                        $body .= "<div class='card-content'>";
                        $body .= "<a class='card-title' href='{$og['og:url']}'>";
                        if (isset($og['og:title'])) {
                            $body .= $og['og:title'];
                        } else {
                            $body .= $og['og:url'];
                        $body .= "<header class='card-title'>{$og['og:title']}</header>";
                        if (isset($og['og:description'])) {
                            $body .= "<p class='card-description'>{$og['og:description']}</p>";
                        }
                        $body .= "</a>";
                        if (isset($og['og:site_name'])) {
                            $body .= "<div class='card-site'>" . $og['og:site_name'] . "</div>";
                        }
                        $body .= "</div>";
                        $body .= "</div>";
                        $body .= "</a>";
                    }
                }
            }


@@ 260,12 112,17 @@ if ($url  !== '') {
                break;
            }
        }

        $footer = "Source: <a href=\"{$obj['id']}\">{$obj['id']}</a><br>";
        $footer = "";
        $footer .= "<form id='tools' method='post' action='/'><input type='hidden' name='url' value='{$url}'>";
        $footer .= "<p>Source: <a href='{$obj['id']}' rel='noopener noreferrer'>{$obj['id']}</a><br>";
        $footer .= "Fetched " . (new DateTime('now'))->format('c') . " - ";
        $footer .= "<button name='update' value='1'>update</button> <button name='delete' value='1'>delete</button>";
        $footer .= "</p>";
        $footer .= "</form>";

        ob_start();
        include "page.tpl.php";
        $page = ob_get_clean();
        $page = ob_get_contents();
        ob_end_clean();

        $dir = dirname($cachefile);

A lib.php => lib.php +178 -0
@@ 0,0 1,178 @@
<?php

$debug = ($_GET['d'] ?? "0") == 1;


if ($debug) {
    ini_set('display_errors', 1);
    ini_set('display_startup_errors', 1);
    error_reporting(E_ALL);
}
function d() {
    echo "<pre>";
    call_user_func_array('var_dump', func_get_args());
}

function dd() {   
    call_user_func_array('d', func_get_args());
    die();
}


class HttpException extends Exception {}


function http_get($url, $accept='application/activity+json') {
    $options = [
        CURLOPT_RETURNTRANSFER => true,   // return web page
        CURLOPT_HEADER         => false,  // don't return headers
        CURLOPT_FOLLOWLOCATION => true,   // follow redirects
        CURLOPT_MAXREDIRS      => 10,     // stop after 10 redirects
        CURLOPT_ENCODING       => "",     // handle compressed
        CURLOPT_USERAGENT      => "apthread", // name of client
        CURLOPT_AUTOREFERER    => true,   // set referrer on redirect
        CURLOPT_CONNECTTIMEOUT => 120,    // time-out on connect
        CURLOPT_TIMEOUT        => 120,    // time-out on response
    ];

    $headers = [
        "Accept: $accept",
    ];

    $ch = curl_init($url);
    curl_setopt_array($ch, $options);
    curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
    $server_output = curl_exec ($ch);
    $res = curl_getinfo($ch);
    curl_close ($ch);

    if ($res['http_code'] >= 400) {
        throw new HttpException("HTTP Error " . $res['http_code']);
    }
    return $server_output;
}

function as_obj($data) {
    if (is_array($data)) {
        return $data;
    }
    elseif (is_string($data)) {
        try {
            return json_decode(http_get($data), true);
        } catch (HttpException $e) {
            error($e->getMessage());
        }
    }
    return null;
}

function as_url($obj) {
    if (is_array($obj)) return $obj['id'];
    return $obj;
}

function get_first_reply($obj) {
    $replies = as_obj($obj['replies'] ?? null);
    if (is_null($replies)) return null;

    $page = as_obj($replies['first']);
    $items = $page['items'] ?? [];
    if (count($items) > 0) {
        return as_obj($items[0]);
    }

    return null;
}

function title($obj, $max=8) {
    // $t = preg_replace("|<[^>]*>|", "", $obj['content']);
    $t = $obj['content'];
    $t = preg_replace("%(</p>|br\w*/?>)%", "\$1\n", $t);
    $t = explode("\n", trim($t))[0];
    $t = strip_tags($t);
    $t = html_entity_decode($t);
    $t = preg_split("|[.!?(\[{]|", $t)[0];
    $ta = preg_split("|\s+|", $t);
    $tb = array_slice($ta, 0, $max);
    $t = implode(" ", $tb);
    if (count($ta) > $max) $t = $t . "...";
    //$t = htmlentities($t);
    return $t;
}

$ogcache = [];
function get_opengraph($url) {
    // twitter to nitter
    $url = str_replace("https://twitter.com/", "https://nitter.net/", $url);

    if (isset($ogcache[$url])) {
        return $ogcache[$url];
    }

    try {
        $page = http_get($url, 'text/*');
    } catch (HttpException $e) {
        return null;
    }
    return parse_opengraph($page, $url);
}

function parse_opengraph($page, $url) {
    if (! strpos(trim($page), '<?xml') !== 0) {
        // force utf8. looks like load html doesn't read encoding from html tags
        $page = '<' .'?xml encoding="utf-8" ?' .'>'.$page;
    }

    $dom = new DomDocument();
    $dom->loadHTML($page, LIBXML_NONET|LIBXML_NOERROR|LIBXML_NOWARNING);
    $xpath = new DOMXpath($dom);

    // defaults from page
    $parsed = parse_url($url,  PHP_URL_HOST);
    $data = [
        'og:type' => 'article',
        'og:url' => $url,
        'og:title' => $url,
        'og:site_name' => parse_url($url,  PHP_URL_HOST),
    ];
    $items = $xpath->query('//title');
    if ($items->length > 0) {
        $data['og:title'] = $items->item(0)->nodeValue;
    }

    // look for opengraph meta tags
    $items = $xpath->query('//meta[starts-with(@property, "og:")]');
    if ($items->length > 0) {
        for ($i = 0; $i < $items->length; $i++) {
            $key = $items->item($i)->attributes->getNamedItem('property')->value;
            $value = $items->item($i)->attributes->getNamedItem('content')->value;
            $data[$key] = $value;
        }
    }
    $ogcache[$url] = $data;
    return $data;
}


function first_url_in_text($obj) {
    $t = preg_replace('|<br[ /]*>|', "\n", $obj['content']);
    $t = preg_replace("|</p>([^\n])|", "</p>\n\$1", $obj['content']);
    $t = strip_tags($t);
    $toks  = preg_split('|(https?://)|', $t, 2, PREG_SPLIT_DELIM_CAPTURE);
    if (isset($toks[2])){
        [$_, $prot, $res] = $toks;
        $toks = preg_split('|[^A-Za-z0-9._~:\/?#[\]@!$\'()*+,;=-]|', $res, 2);
        $url = $toks[0];
        return  $prot . $url;
    }
    return null;
}


function error($error) {
    global $body, $footer, $title;
    $title .= " - Error";
    $body .= "<p>$error</p>";
    include "page.tpl.php";
    die();
}