From 51cdb66f9c65c41fd1c5b0564001ec5724be1575 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tu=C4=9Fhan=20Belbek?= <destructer9@gmail.com>
Date: Thu, 17 Oct 2024 14:17:48 +0200
Subject: [PATCH] [HarvardBusinessReviewBridge] Add bridge (#4293)

---
 bridges/HarvardBusinessReviewBridge.php | 88 +++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 bridges/HarvardBusinessReviewBridge.php

diff --git a/bridges/HarvardBusinessReviewBridge.php b/bridges/HarvardBusinessReviewBridge.php
new file mode 100644
index 00000000..cd99a1ba
--- /dev/null
+++ b/bridges/HarvardBusinessReviewBridge.php
@@ -0,0 +1,88 @@
+<?php
+
+class HarvardBusinessReviewBridge extends BridgeAbstract
+{
+    const NAME = 'Harvard Business Review - The Latest';
+    const MAINTAINER = 'yourname';
+    const URI = 'https://hbr.org';
+    const DESCRIPTION = 'Returns the latest articles from Harvard Business Review';
+    const CACHE_TIMEOUT = 3600; // 60min
+
+    const PARAMETERS = [[
+        'postcount' => [
+            'name' => 'Limit',
+            'type' => 'number',
+            'required' => true,
+            'title' => 'Maximum number of items to return',
+            'defaultValue' => 6, //More requires clicking button "Load more"
+        ],
+    ]];
+
+    public function collectData()
+    {
+        $url = self::URI . '/the-latest';
+        $html = getSimpleHTMLDOM($url);
+
+        foreach ($html->find('li.stream-entry') as $data) {
+            // Skip if $data is null
+            if ($data === null) {
+                continue;
+            }
+
+            try {
+                // Skip entries containing the text 'stream-ad-container'
+                if ($data->innertext !== null && strpos($data->innertext, 'stream-ad-container') !== false) {
+                    continue;
+                }
+
+                // Skip entries with class 'sponsored'
+                if ($data->hasClass('sponsored')) {
+                    continue;
+                }
+
+                $item = [];
+                $linkElement = $data->find('a', 0);
+                $titleElement = $data->find('h3.hed a', 0);
+                $authorElement = $data->find('ul.byline-list li', 0);
+                $timestampElement = $data->find('li.pubdate time', 0);
+                $contentElement = $data->find('div.dek', 0);
+
+                if ($linkElement) {
+                    $item['uri'] = self::URI . $linkElement->getAttribute('href');
+                } else {
+                    continue; // Skip this entry if no link is found
+                }
+                if ($titleElement) {
+                    $item['title'] = trim($titleElement->plaintext);
+                } else {
+                    continue; // Skip this entry if no title is found
+                }
+                if ($authorElement) {
+                    $item['author'] = trim($authorElement->plaintext);
+                } else {
+                    $item['author'] = 'Unknown'; // Default value if author is missing
+                }
+                if ($timestampElement) {
+                    $item['timestamp'] = strtotime($timestampElement->plaintext);
+                } else {
+                    $item['timestamp'] = time(); // Default to current time if timestamp is missing
+                }
+                if ($contentElement) {
+                    $item['content'] = trim($contentElement->plaintext);
+                } else {
+                    $item['content'] = ''; // Default to empty string if content is missing
+                }
+                $item['uid'] = hash('sha256', $item['title']);
+
+                $this->items[] = $item;
+
+                if (count($this->items) >= $this->getInput('postcount')) {
+                    break;
+                }
+            } catch (Exception $e) {
+                // Log the error if necessary
+                continue; // Skip to the next iteration on error
+            }
+        }
+    }
+}
\ No newline at end of file