fix(legifrance): emergency repair, still semi-broken (#4391)

This commit is contained in:
Dag 2025-01-03 07:23:13 +01:00 committed by GitHub
parent d36cd0a332
commit db3899f2e6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 38 additions and 32 deletions

View file

@ -10,9 +10,11 @@ class DansTonChatBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$html = getSimpleHTMLDOM(self::URI . 'latest.html'); $url = self::URI . 'latest.html';
$dom = getSimpleHTMLDOM($url);
foreach ($html->find('div.item') as $element) { $items = $dom->find('div.item');
foreach ($items as $element) {
$item = []; $item = [];
$item['uri'] = $element->find('a', 0)->href; $item['uri'] = $element->find('a', 0)->href;
$titleContent = $element->find('h3 a', 0); $titleContent = $element->find('h3 a', 0);

View file

@ -14,6 +14,37 @@ class LegifranceJOBridge extends BridgeAbstract
private $timestamp; private $timestamp;
private $uri; private $uri;
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI);
$title = $html->find('h2.titleJO', 0);
//$this->author = trim($title->plaintext);
$uri1 = $html->find('h2.titleELI', 0);
//$uri = $uri1->plaintext;
//$this->uri = trim(substr($uri, strpos($uri, 'https')));
$this->timestamp = strtotime(substr($this->uri, strpos($this->uri, 'eli/jo/') + strlen('eli/jo/'), -5));
foreach ($html->find('h3') as $section) {
$subsections = $section->nextSibling()->find('h4');
foreach ($subsections as $subsection) {
$origins = $subsection->nextSibling()->find('h5');
foreach ($origins as $origin) {
$this->items[] = $this->extractItem($section, $subsection, $origin);
}
if (!empty($origins)) {
continue;
}
$this->items[] = $this->extractItem($section, $subsection);
}
if (!empty($subsections)) {
continue;
}
$this->items[] = $this->extractItem($section);
}
}
private function extractItem($section, $subsection = null, $origin = null) private function extractItem($section, $subsection = null, $origin = null)
{ {
$item = []; $item = [];
@ -35,7 +66,9 @@ class LegifranceJOBridge extends BridgeAbstract
$item['content'] = ''; $item['content'] = '';
foreach ($data->nextSibling()->find('a') as $content) { foreach ($data->nextSibling()->find('a') as $content) {
$text = $content->plaintext; $text = $content->plaintext;
$href = $content->nextSibling()->getAttribute('resource'); $href = '';
//$href = $content->nextSibling()->getAttribute('resource');
$item['content'] .= '<p><a href="' . $href . '">' . $text . '</a></p>'; $item['content'] .= '<p><a href="' . $href . '">' . $text . '</a></p>';
} }
return $item; return $item;
@ -45,33 +78,4 @@ class LegifranceJOBridge extends BridgeAbstract
{ {
return 'https://www.legifrance.gouv.fr/img/favicon.ico'; return 'https://www.legifrance.gouv.fr/img/favicon.ico';
} }
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI)
or $this->returnServer('Unable to download ' . self::URI);
$this->author = trim($html->find('h2.titleJO', 0)->plaintext);
$uri = $html->find('h2.titleELI', 0)->plaintext;
$this->uri = trim(substr($uri, strpos($uri, 'https')));
$this->timestamp = strtotime(substr($this->uri, strpos($this->uri, 'eli/jo/') + strlen('eli/jo/'), -5));
foreach ($html->find('h3') as $section) {
$subsections = $section->nextSibling()->find('h4');
foreach ($subsections as $subsection) {
$origins = $subsection->nextSibling()->find('h5');
foreach ($origins as $origin) {
$this->items[] = $this->extractItem($section, $subsection, $origin);
}
if (!empty($origins)) {
continue;
}
$this->items[] = $this->extractItem($section, $subsection);
}
if (!empty($subsections)) {
continue;
}
$this->items[] = $this->extractItem($section);
}
}
} }