mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-12-18 17:10:29 +03:00
101 lines
6.4 KiB
Python
101 lines
6.4 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
from datetime import datetime
|
|
import os.path
|
|
|
|
# This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge
|
|
#
|
|
# This will scrape the whitelisted bridges in the current state (port 3000) and the PR state (port 3001) of
|
|
# RSS-Bridge, generate a feed for each of the bridges and save the output as html files.
|
|
# It also replaces the default static CSS link with a hardcoded link to @em92's public instance, so viewing
|
|
# the HTML file locally will actually work as designed.
|
|
|
|
def testBridges(bridges,status):
|
|
for bridge in bridges:
|
|
if bridge.get('data-ref'): # Some div entries are empty, this ignores those
|
|
bridgeid = bridge.get('id')
|
|
bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata
|
|
bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html'
|
|
forms = bridge.find_all("form")
|
|
formid = 1
|
|
for form in forms:
|
|
# a bridge can have multiple contexts, named 'forms' in html
|
|
# this code will produce a fully working formstring that should create a working feed when called
|
|
# this will create an example feed for every single context, to test them all
|
|
formstring = ''
|
|
errormessages = []
|
|
parameters = form.find_all("input")
|
|
lists = form.find_all("select")
|
|
# this for/if mess cycles through all available input parameters, checks if it required, then pulls
|
|
# the default or examplevalue and then combines it all together into the formstring
|
|
# if an example or default value is missing for a required attribute, it will throw an error
|
|
# any non-required fields are not tested!!!
|
|
for parameter in parameters:
|
|
if parameter.get('type') == 'hidden' and parameter.get('name') == 'context':
|
|
cleanvalue = parameter.get('value').replace(" ","+")
|
|
formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue
|
|
if parameter.get('type') == 'number' or parameter.get('type') == 'text':
|
|
if parameter.has_attr('required'):
|
|
if parameter.get('placeholder') == '':
|
|
if parameter.get('value') == '':
|
|
errormessages.append(parameter.get('name'))
|
|
else:
|
|
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value')
|
|
else:
|
|
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder')
|
|
# same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring
|
|
if parameter.get('type') == 'checkbox':
|
|
if parameter.has_attr('checked'):
|
|
formstring = formstring + '&' + parameter.get('name') + '=on'
|
|
for list in lists:
|
|
selectionvalue = ''
|
|
for selectionentry in list.contents:
|
|
if 'selected' in selectionentry.attrs:
|
|
selectionvalue = selectionentry.get('value')
|
|
break
|
|
if selectionvalue == '':
|
|
selectionvalue = list.contents[0].get('value')
|
|
formstring = formstring + '&' + list.get('name') + '=' + selectionvalue
|
|
if not errormessages:
|
|
# if all example/default values are present, form the full request string, run the request, replace the static css
|
|
# file with the url of em's public instance and then upload it to termpad.com, a pastebin-like-site.
|
|
r = requests.get(URL + bridgestring + formstring)
|
|
pagetext = r.text.replace('static/HtmlFormat.css','https://feed.eugenemolotov.ru/static/HtmlFormat.css')
|
|
pagetext = pagetext.encode("utf_8")
|
|
termpad = requests.post(url="https://termpad.com/", data=pagetext)
|
|
termpadurl = termpad.text
|
|
termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
|
|
termpadurl = termpadurl.replace('\n','')
|
|
with open(os.getcwd() + '/comment.txt', 'a+') as file:
|
|
file.write("\n")
|
|
file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
|
|
else:
|
|
# if there are errors (which means that a required value has no example or default value), log out which error appeared
|
|
termpad = requests.post(url="https://termpad.com/", data=str(errormessages))
|
|
termpadurl = termpad.text
|
|
termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
|
|
termpadurl = termpadurl.replace('\n','')
|
|
with open(os.getcwd() + '/comment.txt', 'a+') as file:
|
|
file.write("\n")
|
|
file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
|
|
formid += 1
|
|
|
|
gitstatus = ["current", "pr"]
|
|
now = datetime.now()
|
|
date_time = now.strftime("%Y-%m-%d, %H:%M:%S")
|
|
|
|
with open(os.getcwd() + '/comment.txt', 'w+') as file:
|
|
file.write(''' ## Pull request artifacts
|
|
| file | last change |
|
|
| ---- | ------ |''')
|
|
|
|
for status in gitstatus: # run this twice, once for the current version, once for the PR version
|
|
if status == "current":
|
|
port = "3000" # both ports are defined in the corresponding workflow .yml file
|
|
elif status == "pr":
|
|
port = "3001"
|
|
URL = "http://localhost:" + port
|
|
page = requests.get(URL) # Use python requests to grab the rss-bridge main page
|
|
soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
|
|
bridges = soup.find_all("section") # get a soup-formatted list of all bridges on the rss-bridge page
|
|
testBridges(bridges,status) # run the main scraping code with the list of bridges and the info if this is for the current version or the pr version
|