rss-bridge/.github/prtester.py

import requests
from bs4 import BeautifulSoup
from datetime import datetime
import os.path

# This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge
#
# This will scrape the whitelisted bridges in the current state (port 3000) and the PR state (port 3001) of
# RSS-Bridge, generate a feed for each of the bridges and save the output as html files.
# It also replaces the default static CSS link with a hardcoded link to @em92's public instance, so viewing
# the HTML file locally will actually work as designed.

def testBridges(bridges,status):
    for bridge in bridges:
        if bridge.get('data-ref'): # Some div entries are empty, this ignores those
            bridgeid = bridge.get('id')
            bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata
            bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html'
            forms = bridge.find_all("form")
            formid = 1
            for form in forms:
                # a bridge can have multiple contexts, named 'forms' in html
                # this code will produce a fully working formstring that should create a working feed when called
                # this will create an example feed for every single context, to test them all
                formstring = ''
                errormessages = []
                parameters = form.find_all("input")
                lists = form.find_all("select")
                # this for/if mess cycles through all available input parameters, checks if it required, then pulls
                # the default or examplevalue and then combines it all together into the formstring
                # if an example or default value is missing for a required attribute, it will throw an error
                # any non-required fields are not tested!!!
                for parameter in parameters:
                    if parameter.get('type') == 'hidden' and parameter.get('name') == 'context':
                        cleanvalue = parameter.get('value').replace(" ","+")
                        formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue
                    if parameter.get('type') == 'number' or parameter.get('type') == 'text':
                        if parameter.has_attr('required'):
                            if parameter.get('placeholder') == '':
                                if parameter.get('value') == '':
                                    errormessages.append(parameter.get('name'))
                                else:
                                    formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value')
                            else:
                                formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder')
                    # same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring
                    if parameter.get('type') == 'checkbox':
                        if parameter.has_attr('checked'):
                            formstring = formstring + '&' + parameter.get('name') + '=on'
                for list in lists:
                    selectionvalue = ''
                    for selectionentry in list.contents:
                        if 'selected' in selectionentry.attrs:
                            selectionvalue = selectionentry.get('value')
                            break
                    if selectionvalue == '':
                        selectionvalue = list.contents[0].get('value')
                    formstring = formstring + '&' + list.get('name') + '=' + selectionvalue
                if not errormessages:
                    # if all example/default values are present, form the full request string, run the request, replace the static css
                    # file with the url of em's public instance and then upload it to termpad.com, a pastebin-like-site.
                    r = requests.get(URL + bridgestring + formstring)
                    pagetext = r.text.replace('static/HtmlFormat.css','https://feed.eugenemolotov.ru/static/HtmlFormat.css')
                    pagetext = pagetext.encode("utf_8")
                    termpad = requests.post(url="https://termpad.com/", data=pagetext)
                    termpadurl = termpad.text
                    termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
                    termpadurl = termpadurl.replace('\n','')
                    with open(os.getcwd() + '/comment.txt', 'a+') as file:
                        file.write("\n")
                        file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
                else:
                    # if there are errors (which means that a required value has no example or default value), log out which error appeared
                    termpad = requests.post(url="https://termpad.com/", data=str(errormessages))
                    termpadurl = termpad.text
                    termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
                    termpadurl = termpadurl.replace('\n','')
                    with open(os.getcwd() + '/comment.txt', 'a+') as file:
                        file.write("\n")
                        file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
                formid += 1

gitstatus = ["current", "pr"]
now = datetime.now()
date_time = now.strftime("%Y-%m-%d, %H:%M:%S")

with open(os.getcwd() + '/comment.txt', 'w+') as file:
    file.write(''' ## Pull request artifacts
| file | last change |
| ---- | ------ |''')

for status in gitstatus: # run this twice, once for the current version, once for the PR version
    if status == "current":
        port = "3000" # both ports are defined in the corresponding workflow .yml file
    elif status == "pr":
        port = "3001"
    URL = "http://localhost:" + port
    page = requests.get(URL) # Use python requests to grab the rss-bridge main page
    soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
    bridges = soup.find_all("section") # get a soup-formatted list of all bridges on the rss-bridge page
    testBridges(bridges,status) # run the main scraping code with the list of bridges and the info if this is for the current version or the pr version