rss-bridge/bridges/CodebergBridge.php

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

477 lines
15 KiB
PHP
Raw Normal View History

2021-08-13 06:51:50 +03:00
<?php
class CodebergBridge extends BridgeAbstract
{
2021-08-13 06:51:50 +03:00
const NAME = 'Codeberg Bridge';
const URI = 'https://codeberg.org/';
const DESCRIPTION = 'Returns commits, issues, pull requests or releases for a repository.';
const MAINTAINER = 'VerifiedJoseph';
const PARAMETERS = [
'Commits' => [
'branch' => [
'name' => 'branch',
'type' => 'text',
'exampleValue' => 'main',
'required' => false,
'title' => 'Optional, main branch is used by default.',
],
],
2021-08-13 06:51:50 +03:00
'Issues' => [],
'Issue Comments' => [
'issueId' => [
'name' => 'Issue ID',
'type' => 'text',
'required' => true,
'exampleValue' => '513',
]
],
2021-08-13 06:51:50 +03:00
'Pull Requests' => [],
'Releases' => [],
'Tags' => [],
2021-08-13 06:51:50 +03:00
'global' => [
'username' => [
'name' => 'Username',
'type' => 'text',
'exampleValue' => 'Codeberg',
2021-08-13 06:51:50 +03:00
'title' => 'Username of account that the repository belongs to.',
'required' => true,
],
2021-08-13 06:51:50 +03:00
'repo' => [
'name' => 'Repository',
'type' => 'text',
'exampleValue' => 'Community',
2021-08-13 06:51:50 +03:00
'required' => true,
]
]
];
2021-08-13 06:51:50 +03:00
const CACHE_TIMEOUT = 1800;
2021-08-13 06:51:50 +03:00
const TEST_DETECT_PARAMETERS = [
'https://codeberg.org/Codeberg/Community/issues/507' => [
'context' => 'Issue Comments', 'username' => 'Codeberg', 'repo' => 'Community', 'issueId' => '507'
],
2021-08-13 06:51:50 +03:00
'https://codeberg.org/Codeberg/Community/issues' => [
'context' => 'Issues', 'username' => 'Codeberg', 'repo' => 'Community'
],
2021-08-13 06:51:50 +03:00
'https://codeberg.org/Codeberg/Community/pulls' => [
'context' => 'Pull Requests', 'username' => 'Codeberg', 'repo' => 'Community'
],
2021-08-13 06:51:50 +03:00
'https://codeberg.org/Codeberg/Community/releases' => [
'context' => 'Releases', 'username' => 'Codeberg', 'repo' => 'Community'
],
2021-08-13 06:51:50 +03:00
'https://codeberg.org/Codeberg/Community/commits/branch/master' => [
'context' => 'Commits', 'username' => 'Codeberg', 'repo' => 'Community', 'branch' => 'master'
],
2021-08-13 06:51:50 +03:00
'https://codeberg.org/Codeberg/Community/commits' => [
'context' => 'Commits', 'username' => 'Codeberg', 'repo' => 'Community'
]
];
2021-08-13 06:51:50 +03:00
private $defaultBranch = 'main';
private $issueTitle = '';
2021-08-13 06:51:50 +03:00
private $urlRegex = '/codeberg\.org\/([\w]+)\/([\w]+)(?:\/commits\/branch\/([\w]+))?/';
private $issuesUrlRegex = '/codeberg\.org\/([\w]+)\/([\w]+)\/issues/';
private $pullsUrlRegex = '/codeberg\.org\/([\w]+)\/([\w]+)\/pulls/';
private $releasesUrlRegex = '/codeberg\.org\/([\w]+)\/([\w]+)\/releases/';
private $issueCommentsUrlRegex = '/codeberg\.org\/([\w]+)\/([\w]+)\/issues\/([0-9]+)/';
2021-08-13 06:51:50 +03:00
public function collectData()
{
$url = $this->getURI();
$html = getSimpleHTMLDOM($url);
$html = defaultLinkTo($html, $url);
2021-08-13 06:51:50 +03:00
switch ($this->queriedContext) {
case 'Commits':
$this->extractCommits($html);
break;
2021-08-13 06:51:50 +03:00
case 'Issues':
$this->extractIssues($html);
break;
2021-08-13 06:51:50 +03:00
case 'Issue Comments':
$this->extractIssueComments($html);
break;
2021-08-13 06:51:50 +03:00
case 'Pull Requests':
$this->extractPulls($html);
break;
2021-08-13 06:51:50 +03:00
case 'Releases':
$this->extractReleases($html);
break;
case 'Tags':
$this->extractTags($html);
break;
default:
throw new \Exception('Invalid context: ' . $this->queriedContext);
}
}
2021-08-13 06:51:50 +03:00
public function getName()
{
2021-08-13 06:51:50 +03:00
switch ($this->queriedContext) {
case 'Commits':
if ($this->getBranch() === $this->defaultBranch) {
return $this->getRepo() . ' Commits';
}
2021-08-13 06:51:50 +03:00
return $this->getRepo() . ' Commits (' . $this->getBranch() . ' branch) - ' . self::NAME;
case 'Issues':
return $this->getRepo() . ' Issues - ' . self::NAME;
case 'Issue Comments':
return $this->issueTitle . ' - Issue Comments - ' . self::NAME;
case 'Pull Requests':
return $this->getRepo() . ' Pull Requests - ' . self::NAME;
case 'Releases':
return $this->getRepo() . ' Releases - ' . self::NAME;
case 'Tags':
return $this->getRepo() . ' Tags - ' . self::NAME;
default:
2021-08-13 06:51:50 +03:00
return parent::getName();
}
}
2021-08-13 06:51:50 +03:00
public function getURI()
{
2021-08-13 06:51:50 +03:00
switch ($this->queriedContext) {
case 'Commits':
return self::URI . $this->getRepo() . '/commits/branch/' . $this->getBranch();
case 'Issues':
return self::URI . $this->getRepo() . '/issues/';
case 'Issue Comments':
return self::URI . $this->getRepo() . '/issues/' . $this->getInput('issueId');
case 'Pull Requests':
return self::URI . $this->getRepo() . '/pulls';
case 'Releases':
return self::URI . $this->getRepo() . '/releases';
case 'Tags':
return self::URI . $this->getRepo() . '/tags';
default:
2021-08-13 06:51:50 +03:00
return parent::getURI();
}
}
2021-08-13 06:51:50 +03:00
private function getBranch()
{
2021-08-13 06:51:50 +03:00
if ($this->getInput('branch')) {
return $this->getInput('branch');
}
2021-08-13 06:51:50 +03:00
return $this->defaultBranch;
}
2021-08-13 06:51:50 +03:00
private function getRepo()
{
2021-08-13 06:51:50 +03:00
return $this->getInput('username') . '/' . $this->getInput('repo');
}
2022-02-23 22:31:08 +03:00
/**
* Extract commits
*/
2021-08-13 06:51:50 +03:00
private function extractCommits($html)
{
2021-08-13 06:51:50 +03:00
$table = $html->find('table#commits-table', 0);
$tbody = $table->find('tbody.commit-list', 0);
2021-08-13 06:51:50 +03:00
foreach ($tbody->find('tr') as $tr) {
$item = [];
2021-08-13 06:51:50 +03:00
$message = $tr->find('td.message', 0);
2021-08-13 06:51:50 +03:00
$item['title'] = $message->find('span.message-wrapper', 0)->plaintext;
$item['uri'] = $tr->find('td.sha', 0)->find('a', 0)->href;
$item['author'] = $tr->find('td.author', 0)->plaintext;
$var = $tr->find('td', 3);
$var1 = $var->find('span', 0);
if ($var1) {
$item['timestamp'] = $var1->title;
}
2021-08-13 06:51:50 +03:00
if ($message->find('pre.commit-body', 0)) {
$message->find('pre.commit-body', 0)->style = '';
2021-08-13 06:51:50 +03:00
$item['content'] = $message->find('pre.commit-body', 0);
} else {
2021-08-13 06:51:50 +03:00
$item['content'] = '<blockquote>' . $item['title'] . '</blockquote>';
}
2021-08-13 06:51:50 +03:00
$this->items[] = $item;
}
}
/**
2022-02-23 22:31:08 +03:00
* Extract issues
*/
2021-08-13 06:51:50 +03:00
private function extractIssues($html)
{
$issueList = $html->find('div#issue-list', 0);
foreach ($issueList->find('div.flex-item') as $div) {
$item = [];
$number = trim($div->find('a.index,ml-0.mr-2', 0)->plaintext);
$item['title'] = $div->find('a.issue-title', 0)->plaintext . ' (' . $number . ')';
$item['uri'] = $div->find('a.issue-title', 0)->href;
$time = $div->find('relative-time.time-since', 0);
if ($time) {
$item['timestamp'] = $time->datetime;
}
//$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
2021-08-13 06:51:50 +03:00
// Fetch issue page
$issuePage = getSimpleHTMLDOMCached($item['uri'], 3600);
2021-08-13 06:51:50 +03:00
$issuePage = defaultLinkTo($issuePage, self::URI);
2022-02-23 22:31:08 +03:00
$item['content'] = $issuePage->find('div.timeline-item.comment.first', 0)->find('div.render-content.markup', 0);
foreach ($div->find('a.ui.label') as $label) {
2021-08-13 06:51:50 +03:00
$item['categories'][] = $label->plaintext;
}
2021-08-13 06:51:50 +03:00
$this->items[] = $item;
}
}
/**
2022-02-23 22:31:08 +03:00
* Extract issue comments
*/
2021-08-13 06:51:50 +03:00
private function extractIssueComments($html)
{
2021-08-13 06:51:50 +03:00
$this->issueTitle = $html->find('span#issue-title', 0)->plaintext
. ' (' . $html->find('span.index', 0)->plaintext . ')';
2022-02-23 22:31:08 +03:00
foreach ($html->find('div.timeline-item.comment') as $div) {
$item = [];
2021-08-13 06:51:50 +03:00
if ($div->class === 'timeline-item comment merge box') {
continue;
}
2022-02-23 22:31:08 +03:00
$item['title'] = $this->ellipsisTitle($div->find('div.render-content.markup', 0)->plaintext);
2021-08-13 06:51:50 +03:00
$item['uri'] = $div->find('span.text.grey', 0)->find('a', 1)->href;
2022-02-23 22:31:08 +03:00
$item['content'] = $div->find('div.render-content.markup', 0);
2021-08-13 06:51:50 +03:00
if ($div->find('div.dropzone-attachments', 0)) {
$item['content'] .= $div->find('div.dropzone-attachments', 0);
}
2021-08-13 06:51:50 +03:00
$item['author'] = $div->find('a.author', 0)->innertext;
2023-09-29 20:17:03 +03:00
$timeSince = $div->find('span.time-since', 0);
if ($timeSince) {
$item['timestamp'] = $timeSince->title;
}
2021-08-13 06:51:50 +03:00
$this->items[] = $item;
}
}
/**
2022-02-23 22:31:08 +03:00
* Extract pulls
*/
2021-08-13 06:51:50 +03:00
private function extractPulls($html)
{
$div = $html->find('div#issue-list', 0);
$var2 = $div->find('div.flex-item');
foreach ($var2 as $li) {
$item = [];
2022-02-23 22:31:08 +03:00
$number = trim($li->find('a.index,ml-0.mr-2', 0)->plaintext);
$a = $li->find('a.issue-title', 0);
$item['title'] = $a->plaintext . ' (' . $number . ')';
$item['uri'] = $a->href;
$time = $li->find('relative-time.time-since', 0);
if ($time) {
$item['timestamp'] = $time->datetime;
}
// Extracting the author is a bit awkward after they changed their html
//$desc = $li->find('div.desc', 0);
//$item['author'] = $desc->find('a', 1)->plaintext;
2021-08-13 06:51:50 +03:00
// Fetch pull request page
$pullRequestPage = getSimpleHTMLDOMCached($item['uri'], 3600);
2021-08-13 06:51:50 +03:00
$pullRequestPage = defaultLinkTo($pullRequestPage, self::URI);
$var = $pullRequestPage->find('ui.timeline', 0);
if ($var) {
$var1 = $var->find('div.render-content.markup', 0);
$item['content'] = $var1;
}
2021-08-13 06:51:50 +03:00
foreach ($li->find('a.ui.label') as $label) {
$item['categories'][] = $label->plaintext;
}
2021-08-13 06:51:50 +03:00
$this->items[] = $item;
}
}
/**
2022-02-23 22:31:08 +03:00
* Extract releases
*/
2022-02-23 22:31:08 +03:00
private function extractReleases($html)
{
2022-02-23 22:31:08 +03:00
$ul = $html->find('ul#release-list', 0);
$lis = $ul->find('li.ui.grid');
if ($lis === []) {
throw new \Exception('Found zero releases');
}
foreach ($lis as $li) {
$item = [];
2022-02-23 22:31:08 +03:00
$item['title'] = $li->find('h4', 0)->plaintext;
$item['uri'] = $li->find('h4', 0)->find('a', 0)->href;
2021-08-13 06:51:50 +03:00
2022-02-23 22:31:08 +03:00
$tag = $this->stripSvg($li->find('span.tag', 0));
$commit = $this->stripSvg($li->find('span.commit', 0));
$downloads = $this->extractDownloads($li->find('details.download', 0));
2021-08-13 06:51:50 +03:00
2022-02-23 22:31:08 +03:00
$item['content'] = $li->find('div.markup.desc', 0);
$item['content'] .= <<<HTML
2021-08-13 06:51:50 +03:00
<strong>Tag</strong>
<p>{$tag}</p>
<strong>Commit</strong>
<p>{$commit}</p>
{$downloads}
HTML;
2022-02-23 22:31:08 +03:00
$item['timestamp'] = $li->find('span.time', 0)->find('span', 0)->title;
$item['author'] = $li->find('span.author', 0)->find('a', 0)->plaintext;
2021-08-13 06:51:50 +03:00
$this->items[] = $item;
}
}
private function extractTags($html)
{
$tags = $html->find('td.tag');
if ($tags === []) {
throw new \Exception('Found zero tags');
}
foreach ($tags as $tag) {
$this->items[] = [
'title' => $tag->find('a', 0)->plaintext,
'uri' => $tag->find('a', 0)->href,
'content' => $tag->innertext,
];
}
}
2022-02-23 22:31:08 +03:00
/**
* Extract downloads for a releases
*/
2021-08-13 06:51:50 +03:00
private function extractDownloads($html, $skipFirst = false)
{
$downloads = '';
foreach ($html->find('a') as $index => $a) {
if ($skipFirst === true && $index === 0) {
continue;
}
$downloads .= <<<HTML
2022-02-23 22:31:08 +03:00
<a href="{$a->herf}">{$a->plaintext}</a><br>
2021-08-13 06:51:50 +03:00
HTML;
}
return <<<EOD
<strong>Downloads</strong>
<p>{$downloads}</p>
EOD;
}
2022-02-23 22:31:08 +03:00
/**
* Ellipsis title to first 100 characters
*/
2021-08-13 06:51:50 +03:00
private function ellipsisTitle($text)
{
$length = 100;
2021-08-13 06:51:50 +03:00
if (strlen($text) > $length) {
$text = explode('<br>', wordwrap($text, $length, '<br>'));
return $text[0] . '...';
}
return $text;
}
2022-02-23 22:31:08 +03:00
/**
* Strip SVG tag
*/
private function stripSvg($html)
{
if ($html === null) {
return null;
}
2022-02-23 22:31:08 +03:00
if ($html->find('svg', 0)) {
$html->find('svg', 0)->outertext = '';
}
2022-02-23 22:31:08 +03:00
return $html;
}
public function detectParameters($url)
{
$params = [];
// Issue Comments
if (preg_match($this->issueCommentsUrlRegex, $url, $matches)) {
$params['context'] = 'Issue Comments';
$params['username'] = $matches[1];
$params['repo'] = $matches[2];
$params['issueId'] = $matches[3];
return $params;
}
// Issues
if (preg_match($this->issuesUrlRegex, $url, $matches)) {
$params['context'] = 'Issues';
$params['username'] = $matches[1];
$params['repo'] = $matches[2];
return $params;
}
// Pull Requests
if (preg_match($this->pullsUrlRegex, $url, $matches)) {
$params['context'] = 'Pull Requests';
$params['username'] = $matches[1];
$params['repo'] = $matches[2];
return $params;
}
// Releases
if (preg_match($this->releasesUrlRegex, $url, $matches)) {
$params['context'] = 'Releases';
$params['username'] = $matches[1];
$params['repo'] = $matches[2];
return $params;
}
// Commits
if (preg_match($this->urlRegex, $url, $matches)) {
$params['context'] = 'Commits';
$params['username'] = $matches[1];
$params['repo'] = $matches[2];
if (isset($matches[3])) {
$params['branch'] = $matches[3];
}
return $params;
}
return null;
}
2021-08-13 06:51:50 +03:00
}