<?php

class FacebookBridge extends BridgeAbstract
{
    // const MAINTAINER = 'teromene, logmanoriginal';
    const NAME = 'Facebook Bridge | Main Site';
    const URI = 'https://www.facebook.com/';
    const CACHE_TIMEOUT = 1800; // 30min
    const DESCRIPTION = 'Input a page title or a profile log. For a profile log,
 please insert the parameter as follow : myExamplePage/132621766841117';

    const PARAMETERS = [
        'User' => [
            'u' => [
                'name' => 'Username',
                'required' => true
            ],
            'media_type' => [
                'name' => 'Media type',
                'type' => 'list',
                'required' => false,
                'values' => [
                    'All' => 'all',
                    'Video' => 'video',
                    'No Video' => 'novideo'
                ],
                'defaultValue' => 'all'
            ],
            'skip_reviews' => [
                'name' => 'Skip reviews',
                'type' => 'checkbox',
                'required' => false,
                'defaultValue' => false,
                'title' => 'Feed includes reviews when unchecked'
            ]
        ],
        'Group' => [
            'g' => [
                'name' => 'Group',
                'type' => 'text',
                'required' => true,
                'exampleValue' => 'https://www.facebook.com/groups/743149642484225',
                'title' => 'Insert group name or facebook group URL'
            ]
        ],
        'global' => [
            'limit' => [
                'name' => 'Limit',
                'type' => 'number',
                'required' => false,
                'title' => 'Specify the number of items to return (default: -1)',
                'defaultValue' => -1
            ]
        ]
    ];

    private $authorName = '';
    private $groupName = '';

    public function getIcon()
    {
        return 'https://static.xx.fbcdn.net/rsrc.php/yo/r/iRmz9lCMBD2.ico';
    }

    public function getName()
    {
        switch ($this->queriedContext) {
            case 'User':
                if (!empty($this->authorName)) {
                    return $this->extraInfos['name'] ?? $this->authorName;
                }
                break;

            case 'Group':
                if (!empty($this->groupName)) {
                    return $this->groupName;
                }
                break;
        }

        return parent::getName();
    }

    public function detectParameters($url)
    {
        $params = [];

        // By profile
        $regex = '/^(https?:\/\/)?(www\.)?facebook\.com\/profile\.php\?id\=([^\/?&\n]+)?(.*)/';
        if (preg_match($regex, $url, $matches) > 0) {
            $params['context'] = 'User';
            $params['u'] = urldecode($matches[3]);
            return $params;
        }

        // By group
        $regex = '/^(https?:\/\/)?(www\.)?facebook\.com\/groups\/([^\/?\n]+)?(.*)/';
        if (preg_match($regex, $url, $matches) > 0) {
            $params['context'] = 'Group';
            $params['g'] = urldecode($matches[3]);
            return $params;
        }

        // By username
        $regex = '/^(https?:\/\/)?(www\.)?facebook\.com\/([^\/?\n]+)/';

        if (preg_match($regex, $url, $matches) > 0) {
            $params['context'] = '';
            $params['u'] = urldecode($matches[3]);
            return $params;
        }

        return null;
    }

    public function getURI()
    {
        $uri = self::URI;

        switch ($this->queriedContext) {
            case 'Group':
                // Discover groups via  https://www.facebook.com/groups/
                // Example group:       https://www.facebook.com/groups/sailors.worldwide
                $uri .= 'groups/' . $this->sanitizeGroup(filter_var($this->getInput('g'), FILTER_SANITIZE_URL));
                break;

            case 'User':
                // Example user 1:      https://www.facebook.com/artetv/
                // Example user 2:      artetv
                $user = $this->sanitizeUser($this->getInput('u'));

                if (!strpos($user, '/')) {
                    $uri .= urlencode($user) . '/posts';
                } else {
                    $uri .= 'pages/' . $user;
                }

                break;
        }

        // Request the mobile version to reduce page size (no javascript)
        // More information: https://stackoverflow.com/a/11103592
        return $uri .= '?_fb_noscript=1';
    }

    public function collectData()
    {
        switch ($this->queriedContext) {
            case 'Group':
                $this->collectGroupData();
                break;

            case 'User':
                $this->collectUserData();
                break;

            default:
                returnClientError('Unknown context: "' . $this->queriedContext . '"!');
        }

        $limit = $this->getInput('limit') ?: -1;

        if ($limit > 0 && count($this->items) > $limit) {
            $this->items = array_slice($this->items, 0, $limit);
        }
    }

    #region Group

    private function collectGroupData()
    {
        if (getEnv('HTTP_ACCEPT_LANGUAGE')) {
            $header = ['Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE')];
        } else {
            $header = [];
        }

        $touchURI = str_replace(
            'https://www.facebook',
            'https://touch.facebook',
            $this->getURI()
        );

        $html = getSimpleHTMLDOM($touchURI, $header);

        if (!$this->isPublicGroup($html)) {
            returnClientError('This group is not public! RSS-Bridge only supports public groups!');
        }

        defaultLinkTo($html, substr(self::URI, 0, strlen(self::URI) - 1));

        $this->groupName = $this->extractGroupName($html);

        $posts = $html->find('div.story_body_container')
            or returnServerError('Failed finding posts!');

        foreach ($posts as $post) {
            $item = [];

            $item['uri'] = $this->extractGroupPostURI($post);
            $item['title'] = $this->extractGroupPostTitle($post);
            $item['author'] = $this->extractGroupPostAuthor($post);
            $item['content'] = $this->extractGroupPostContent($post);
            $item['enclosures'] = $this->extractGroupPostEnclosures($post);

            $this->items[] = $item;
        }
    }

    private function sanitizeGroup($group)
    {
        if (
            filter_var(
                $group,
                FILTER_VALIDATE_URL,
                FILTER_FLAG_PATH_REQUIRED
            )
        ) {
            // User provided a URL

            $urlparts = parse_url($group);

            $this->validateHost($urlparts['host']);

            return explode('/', $urlparts['path'])[2];
        } elseif (strpos($group, '/') !== false) {
            returnClientError('The group you provided is invalid: ' . $group);
        } else {
            return $group;
        }
    }

    private function validateHost($provided_host)
    {
        // Handle mobile links
        if (strpos($provided_host, 'm.') === 0) {
            $provided_host = substr($provided_host, strlen('m.'));
        }
        if (strpos($provided_host, 'touch.') === 0) {
            $provided_host = substr($provided_host, strlen('touch.'));
        }

        $facebook_host = parse_url(self::URI)['host'];

        if (
            $provided_host !== $facebook_host
            && 'www.' . $provided_host !== $facebook_host
        ) {
            returnClientError('The host you provided is invalid! Received "'
                . $provided_host
                . '", expected "'
                . $facebook_host
                . '"!');
        }
    }

    /**
     * @param $html simple_html_dom
     * @return bool
     */
    private function isPublicGroup($html)
    {
        // Facebook touch just presents a login page for non-public groups
        $title = $html->find('title', 0);
        return $title->plaintext !== 'Log in to Facebook | Facebook';
    }

    private function extractGroupName($html)
    {
        $ogtitle = $html->find('._de1', 0)
            or returnServerError('Unable to find group title!');

        return html_entity_decode($ogtitle->plaintext, ENT_QUOTES);
    }

    private function extractGroupPostURI($post)
    {
        $elements = $post->find('a')
            or returnServerError('Unable to find URI!');

        foreach ($elements as $anchor) {
            // Find the one that is a permalink
            if (strpos($anchor->href, 'permalink') !== false) {
                $arr = explode('?', $anchor->href, 2);
                return $arr[0];
            }
        }

        return null;
    }

    private function extractGroupPostContent($post)
    {
        $content = $post->find('div._5rgt', 0)
            or returnServerError('Unable to find user content!');

        $context_text = $content->innertext;
        if ($content->next_sibling() !== null) {
            $context_text .= $content->next_sibling()->innertext;
        }
        return $context_text;
    }

    private function extractGroupPostAuthor($post)
    {
        $element = $post->find('h3 a', 0)
            or returnServerError('Unable to find author information!');

        return $element->plaintext;
    }

    private function extractGroupPostEnclosures($post)
    {
        $elements = $post->find('span._6qdm');
        if ($post->find('div._5rgt', 0)->next_sibling() !== null) {
            array_push($elements, ...$post->find('div._5rgt', 0)->next_sibling()->find('i.img'));
        }

        $enclosures = [];

        $background_img_regex = '/background-image: ?url\\((.+?)\\);/';

        foreach ($elements as $enclosure) {
            if (preg_match($background_img_regex, $enclosure, $matches) > 0) {
                $bg_img_value = trim(html_entity_decode($matches[1], ENT_QUOTES), "'\"");
                $bg_img_url = urldecode(preg_replace('/\\\([0-9a-z]{2}) /', '%$1', $bg_img_value));
                $enclosures[] = urldecode($bg_img_url);
            }
        }

        return empty($enclosures) ? null : $enclosures;
    }

    private function extractGroupPostTitle($post)
    {
        $element = $post->find('h3', 0)
            or returnServerError('Unable to find title!');

        if (strpos($element->plaintext, 'shared') === false) {
            $content = strip_tags($this->extractGroupPostContent($post));

            return $this->extractGroupPostAuthor($post)
            . ' posted: '
            . substr(
                $content,
                0,
                strpos(wordwrap($content, 64), "\n")
            )
            . '...';
        }

        return $element->plaintext;
    }

    #endregion (Group)

    #region User

    /**
     * Checks if $user is a valid username or URI and returns the username
     */
    private function sanitizeUser($user)
    {
        if (filter_var($user, FILTER_VALIDATE_URL)) {
            $urlparts = parse_url($user);

            $this->validateHost($urlparts['host']);

            if (
                !array_key_exists('path', $urlparts)
                || $urlparts['path'] === '/'
            ) {
                returnClientError('The URL you provided doesn\'t contain the user name!');
            }

            return explode('/', $urlparts['path'])[1];
        } else {
            // First character cannot be a forward slash
            if (strpos($user, '/') === 0) {
                returnClientError('Remove leading slash "/" from the username!');
            }

            return $user;
        }
    }

    /**
     * Bypass external link redirection
     */
    private function unescapeFacebookLink($content)
    {
        return preg_replace_callback('/ href=\"([^"]+)\"/i', function ($matches) {
            if (is_array($matches) && count($matches) > 1) {
                $link = $matches[1];

                if (strpos($link, 'facebook.com/l.php?u=') !== false) {
                    $link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
                }

                return ' href="' . $link . '"';
            }
        }, $content);
    }

    /**
     * Remove Facebook's tracking code
     */
    private function removeTrackingCodes($content)
    {
        return preg_replace_callback('/ href=\"([^"]+)\"/i', function ($matches) {
            if (is_array($matches) && count($matches) > 1) {
                $link = $matches[1];

                if (strpos($link, 'facebook.com') !== false) {
                    if (strpos($link, '?') !== false) {
                        $link = substr($link, 0, strpos($link, '?'));
                    }
                }
                return ' href="' . $link . '"';
            }
        }, $content);
    }

    /**
     * Convert textual representation of emoticons back to ASCII emoticons.
     * i.e. "<i><u>smile emoticon</u></i>" => ":)"
     */
    private function unescapeFacebookEmote($content)
    {
        return preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', function ($matches) {
            static $facebook_emoticons = [
                    'smile' => ':)',
                    'frown' => ':(',
                    'tongue' => ':P',
                    'grin' => ':D',
                    'gasp' => ':O',
                    'wink' => ';)',
                    'pacman' => ':<',
                    'grumpy' => '>_<',
                    'unsure' => ':/',
                    'cry' => ':\'(',
                    'kiki' => '^_^',
                    'glasses' => '8-)',
                    'sunglasses' => 'B-)',
                    'heart' => '<3',
                    'devil' => ']:D',
                    'angel' => '0:)',
                    'squint' => '-_-',
                    'confused' => 'o_O',
                    'upset' => 'xD',
                    'colonthree' => ':3',
                    'like' => '&#x1F44D;'];

            $len = count($matches);

            if ($len > 1) {
                for ($i = 1; $i < $len; $i++) {
                    foreach ($facebook_emoticons as $name => $emote) {
                        if ($matches[$i] === $name) {
                            return $emote;
                        }
                    }
                }
            }

            return $matches[0];
        }, $content);
    }

    /**
     * Returns the captcha message for the given captcha
     */
    private function returnCaptchaMessage($captcha)
    {
        // Save form for submitting after getting captcha response
        if (session_status() == PHP_SESSION_NONE) {
            session_start();
        }

        $captcha_fields = [];

        foreach ($captcha->find('input, button') as $input) {
            $captcha_fields[$input->name] = $input->value;
        }

        $_SESSION['captcha_fields'] = $captcha_fields;
        $_SESSION['captcha_action'] = $captcha->find('form', 0)->action;

        // Show captcha filling form to the viewer, proxying the captcha image
        $img = base64_encode(getContents($captcha->find('img', 0)->src));

        header('Content-Type: text/html', true, 500);

        $message = <<<EOD
<form method="post" action="?{$_SERVER['QUERY_STRING']}">
<h2>Facebook captcha challenge</h2>
<p>Unfortunately, rss-bridge cannot fetch the requested page.<br />
Facebook wants rss-bridge to resolve the following captcha:</p>
<p><img src="data:image/png;base64,{$img}" /></p>
<p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />
<input type="submit" value="Submit!" /></p>
</form>
EOD;

        die($message);
    }

    /**
     * Checks if a capture response was received and tries to load the contents
     * @return mixed null if no capture response was received, simplhtmldom document otherwise
     */
    private function handleCaptchaResponse()
    {
        if (isset($_POST['captcha_response'])) {
            if (session_status() == PHP_SESSION_NONE) {
                session_start();
            }

            if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action'])) {
                $captcha_action = $_SESSION['captcha_action'];
                $captcha_fields = $_SESSION['captcha_fields'];
                $captcha_fields['captcha_response'] = preg_replace('/[^a-zA-Z0-9]+/', '', $_POST['captcha_response']);

                $header = [
                    'Content-type: application/x-www-form-urlencoded',
                    'Referer: ' . $captcha_action,
                    'Cookie: noscript=1'
                ];

                $opts = [
                    CURLOPT_POST => 1,
                    CURLOPT_POSTFIELDS => http_build_query($captcha_fields)
                ];

                $html = getSimpleHTMLDOM($captcha_action, $header, $opts);

                return $html;
            }

            unset($_SESSION['captcha_fields']);
            unset($_SESSION['captcha_action']);
        }

        return null;
    }

    private function collectUserData()
    {
        $html = $this->handleCaptchaResponse();

        // Retrieve page contents
        if (is_null($html)) {
            if (getEnv('HTTP_ACCEPT_LANGUAGE')) {
                $header = ['Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE')];
            } else {
                $header = [];
            }

            $url = $this->getURI();
            $html = getSimpleHTMLDOM($url, $header);
        }

        // Handle captcha form?
        $captcha = $html->find('div.captcha_interstitial', 0);

        if (!is_null($captcha)) {
            $this->returnCaptchaMessage($captcha);
        }

        // No captcha? We can carry on retrieving page contents :)
        // First, we check whether the page is public or not
        $loginForm = $html->find('._585r', 0);

        if ($loginForm != null) {
            returnServerError('You must be logged in to view this page. This is not supported by RSS-Bridge.');
        }

        $mainColumn = $html->find('#pagelet_timeline_main_column');
        if (!$mainColumn) {
            throw new \Exception(sprintf('Unable to find anything useful in %s', $url));
        }

        $element = $mainColumn[0]
            ->children(0)
            ->children(0)
            ->next_sibling()
            ->children(0);

        if (isset($element)) {
            $author = str_replace(' - Posts | Facebook', '', $html->find('title#pageTitle', 0)->innertext);

            $profilePic = $html->find('meta[property="og:image"]', 0)->content;

            $this->authorName = $author;

            foreach ($element->children() as $cell) {
                // Manage summary posts
                if (strpos($cell->class, '_3xaf') !== false) {
                    $posts = $cell->children();
                } else {
                    $posts = [$cell];
                }

                // Optionally skip reviews
                if (
                    $this->getInput('skip_reviews')
                    && !is_null($cell->find('#review_composer_container', 0))
                ) {
                    continue;
                }

                foreach ($posts as $post) {
                    // Check media type
                    switch ($this->getInput('media_type')) {
                        case 'all':
                            break;
                        case 'video':
                            if (empty($post->find('[aria-label=Video]'))) {
                                continue 2;
                            }
                            break;
                        case 'novideo':
                            if (!empty($post->find('[aria-label=Video]'))) {
                                continue 2;
                            }
                            break;
                        default:
                            break;
                    }

                    $item = [];

                    if (count($post->find('abbr')) > 0) {
                        $content = $post->find('.userContentWrapper', 0);

                        // This array specifies filters applied to all posts in order of appearance
                        $content_filters = [
                            '._5mly', // Remove embedded videos (the preview image remains)
                            '._2ezg', // Remove "Views ..."
                            '.hidden_elem', // Remove hidden elements (they are hidden anyway)
                            '.timestampContent', // Remove relative timestamp
                            '._6spk', // Remove redundant separator
                        ];

                        foreach ($content_filters as $filter) {
                            foreach ($content->find($filter) as $subject) {
                                $subject->outertext = '';
                            }
                        }

                        // Change origin tag for embedded media from div to paragraph
                        foreach ($content->find('._59tj') as $subject) {
                            $subject->outertext = '<p>' . $subject->innertext . '</p>';
                        }

                        // Change title tag for embedded media from anchor to paragraph
                        foreach ($content->find('._3n1k a') as $anchor) {
                            $anchor->outertext = '<p>' . $anchor->innertext . '</p>';
                        }

                        $content = preg_replace(
                            '/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i',
                            '',
                            $content
                        );

                        $content = preg_replace(
                            '/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i',
                            '',
                            $content
                        );

                        // Remove "SpSonsSoriSsés"
                        $content = preg_replace(
                            '/(?iU)<a [^>]+ href="#" role="link" [^>}]+>.+<\/a>/iU',
                            '',
                            $content
                        );

                        // Remove html nodes, keep only img, links, basic formatting
                        $content = strip_tags($content, '<a><img><i><u><br><p>');

                        $content = $this->unescapeFacebookLink($content);

                        // Clean useless html tag properties and fix link closing tags
                        foreach (
                            [
                            'onmouseover',
                            'onclick',
                            'target',
                            'ajaxify',
                            'tabindex',
                            'class',
                            'style',
                            'data-[^=]*',
                            'aria-[^=]*',
                            'role',
                            'rel',
                            'id'] as $property_name
                        ) {
                            $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content);
                        }

                        $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);

                        $this->unescapeFacebookEmote($content);

                        // Restore links in the post before further parsing
                        $post = defaultLinkTo($post, self::URI);

                        // Restore links in the content before adding to the item
                        $content = defaultLinkTo($content, self::URI);

                        $content = $this->removeTrackingCodes($content);

                        // Retrieve date of the post
                        $date = $post->find('abbr')[0];

                        if (isset($date) && $date->hasAttribute('data-utime')) {
                            $date = $date->getAttribute('data-utime');
                        } else {
                            $date = 0;
                        }

                        // Build title from content
                        $title = strip_tags($post->find('.userContent', 0)->innertext);
                        if (strlen($title) > 64) {
                            $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...';
                        }

                        $uri = $post->find('abbr')[0]->parent()->getAttribute('href');

                        // Extract fbid and patch link
                        if (strpos($uri, '?') !== false) {
                            $query = substr($uri, strpos($uri, '?') + 1);
                            parse_str($query, $query_params);
                            if (isset($query_params['story_fbid'])) {
                                $uri = self::URI . $query_params['story_fbid'];
                            } else {
                                $uri = substr($uri, 0, strpos($uri, '?'));
                            }
                        }

                        //Build and add final item
                        $item['uri'] = htmlspecialchars_decode($uri, ENT_QUOTES);
                        $item['content'] = htmlspecialchars_decode($content, ENT_QUOTES);
                        $item['title'] = htmlspecialchars_decode($title, ENT_QUOTES);
                        $item['author'] = htmlspecialchars_decode($author, ENT_QUOTES);
                        $item['timestamp'] = $date;

                        if (strpos($item['content'], '<img') === false) {
                            $item['enclosures'] = [$profilePic];
                        }

                        $this->items[] = $item;
                    }
                }
            }
        }
    }

    #endregion (User)
}