
Python 3.x-weergawe wysig

# -*- coding: utf-8  -*-
Robot om webskakels op die Wayback Machine te argiveer.

Die volgende parameters word ondersteun:


-dry              Moenie enige veranderinge maak nie, maar wys wat verander sou word.

import re
import time
import json
import urllib
from urllib.request import urlopen
from urllib.parse import urlparse
import ssl
import pywikibot
from pywikibot import pagegenerators
from pywikibot import i18n

# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
    '&params;': pagegenerators.parameterHelp

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

class BasicBot:
    def __init__(self, generator, dry):

            @param generator: The page generator that determines on which pages
                              to work.
            @type generator: generator.
            @param dry: If True, doesn't do any real changes, but only shows
                        what would have been changed.
            @type dry: boolean.
        self.generator = generator
        self.dry = dry

        if hasattr(ssl, '_create_unverified_context'):
           ssl._create_default_https_context = ssl._create_unverified_context

        # Set the edit summary message
        site = pywikibot.Site()
        self.summary = i18n.twtranslate(site, 'basic-changing')

    def run(self):
        """ Verwerk elke bladsy van die generator. """
        for page in self.generator:

    def treat(self, page):
        """ Laai die bladsy, wysig en stoor dit. """
        text = self.load(page)
        if not text:

        # NOTA: Here you can modify the text in whatever way you want. #

        t = page.title(asLink=False);
        pywikibot.output('Verwerk bladsy %s%s%s...' % (bcolors.BOLD, t, bcolors.ENDC))
        talktext = '';

        it = re.findall(r'(https?://[^\s|<>{}\[\]]+)', text)
        it = list(set(it))	# Remove dups
        for wurl in it:
            if "" in wurl: continue
            url = urllib.parse.quote(wurl.encode('utf8'), ':/')
            ia = urlopen('' % url).read().decode('utf8')
            data = json.loads(ia)
#	{"closest":
#		{"available":true,
#		 "url":"",
#		 "timestamp":"20150223120334",
#                "status":"200"
#                }
#         }

            if '"available":true' in ia:
                # Page archived, check to see if original URL is still OK.
                aurl  = data["archived_snapshots"]["closest"]["url"]
                atime = data["archived_snapshots"]["closest"]["timestamp"]
                pywikibot.output('%s... URL %s is reeds geargiveer%s' % (bcolors.OKBLUE, wurl, bcolors.ENDC))
                    ia = urlopen(wurl)
                    rc = ia.getcode()
                    if rc == 200:
                        pywikibot.output('%s... ... URL is steeds OK%s' % (bcolors.OKBLUE, bcolors.ENDC))
                    elif rc == 404:
                        pywikibot.output('%s... ... 404 verander na %s%s' % (bcolors.FAIL, aurl, bcolors.ENDC))
                        pywikibot.output('%s... ... {{Wayback|url=%s|date=%s}}%s' % (bcolors.FAIL, url, atime, bcolors.ENDC))
                        talktext += "|-\n| %s || {{Wayback|url=%s|date=%s}}\n" % (wurl, url, atime)
                        pywikibot.output('%s... ... HTTP %s-fout%s' % (bcolors.OKBLUE, rc, bcolors.ENDC))
                except IOError as e:
                    pywikibot.output('%s... ... URL het aandag nodig %s%s' % (bcolors.FAIL, e, bcolors.ENDC))
                    # pywikibot.output('%s... ... verander na %s%s' % (bcolors.FAIL, aurl, bcolors.ENDC))
                    # pywikibot.output('%s... ... {{Wayback|url=%s|date=%s}}%s' % (bcolors.FAIL, url, atime, bcolors.ENDC))
                    #if str(e) == 'HTTP Error 404: Not Found':
                    #   talktext += "|-\n| %s || {{Wayback|url=%s|date=%s}}\n" % (wurl, url, atime)
                    #   print(talktext)
                    ia = urlopen('' % url).read()
                except IOError as e:
                    pywikibot.output('%s... URL %s gee %s%s' % (bcolors.FAIL, url, e, bcolors.ENDC))
                    ia = '';
            if 'FILE ARCHIVED ON' in str(ia):
                pywikibot.output('%s... URL %s suksesvol opgelaai%s' % (bcolors.OKGREEN, wurl, bcolors.ENDC))
            elif '403 Forbidden' in str(ia):
                pywikibot.output('... URL %s is geblokkeer op' % wurl)
            elif '404: Not Found' in str(ia):
                pywikibot.output('... URL %s is dood' % wurl)
            elif 'due to robots.txt' in str(ia):
                pywikibot.output('... URL %s is geblokkeer deur robots.txt' % wurl)
            elif 'URL has been excluded from the Wayback Machine' in str(ia):
                pywikibot.output('... URL %s is deur Wayback Machine utgesluit' % wurl)
            elif 'look like an valid URL' in str(ia):
                pywikibot.output('%s... URL %s lys soos ''n ongeldige URL%s' % (bcolors.WARNING, wurl, bcolors.ENDC))
            elif 'url is not available on the live web' in str(ia):
                pywikibot.output('... URL %s is nie beskikbaar nie' % wurl)
                #ia = str(ia)#, errors='ignore')
                pywikibot.output('... URL %s het gefaal: %s' % (wurl, ia))

        if talktext:
           talktext = "== Geargiveerde skakels ==\n{| class=\"wikitable\"\n|-\n! Dooie skakel !! Argief\n" + talktext + "|}"
           talkpage = page.toggleTalkPage()
           if talkpage.exists():
              talktext_prev = talkpage.get()
              if talktext == talktext_prev:
                 pywikibot.output("Reeds gestoor - slaan oor...\n")
                 talktext_prev = re.sub(r'== Geargiveerde skakels ==.*\|\}', '', talktext_prev, flags=re.MULTILINE|re.DOTALL)
                 if talktext_prev != '':
                    talktext_prev += "\n\n"
                 talktext_prev += talktext
                 pywikibot.output('Nuwe blad: [' + talktext_prev + ']')
                 talkpage.put(talktext_prev, 'Rapporteer dooie skakels wat geargiveer is');
              pywikibot.output('Skep bespreking: [' + talktext + ']')
              talkpage.put(talktext, 'Rapporteer dooie skakels wat geargiveer is');
        # if not, page, self.summary):
        #    pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))

    def load(self, page):
        """ Laai die teks van 'n gegewe bladsy. """
            # Load the page
            text = page.get()
        except pywikibot.NoPage:
            pywikibot.output(u"Bladsy %s bestaan nie, slaan oor."
                             % page.title(asLink=True))
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"Bladsy %s is 'n aanstuur; slaan oor."
                             % page.title(asLink=True))
            return text
        return None

    def save(self, text, page, comment=None, minorEdit=True,
        """ Opdateer 'n gegewe bladsy met muwe teks. """
        # only save if something was changed
        if text != page.get():
            # Show the title of the page we're working on.
            # Highlight the title in purple.
            pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                             % page.title())
            # show what was changed
            pywikibot.showDiff(page.get(), text)
            pywikibot.output(u'Comment: %s' % comment)
            if not self.dry:
                if pywikibot.input_yn(
                        u'Do you want to accept these changes?',
                        default=False, automatic_quit=False):
                        page.text = text
                        # Save the page
               or self.comment,
                                  minor=minorEdit, botflag=botflag)
                    except pywikibot.LockedPage:
                        pywikibot.output(u"Bladsy %s is gesluit; slaan oor."
                                         % page.title(asLink=True))
                    except pywikibot.EditConflict:
                            u'Slaan %s as gevolg van ''n wysigingskonflik oor'
                            % (page.title()))
                    except pywikibot.SpamfilterError as error:
                            u'Kan nie %s wysig nie, swattlys-inskrywing %s'
                            % (page.title(), error.url))
                        return True
        return False

def main(*args):
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    # Process global arguments to determine desired site
    local_args = pywikibot.handle_args(args)

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    # If dry is True, doesn't do any real changes, but only show
    # what would have been changed.
    dry = False

    # Parse command line arguments
    for arg in local_args:
        if arg.startswith("-dry"):
            dry = True

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = BasicBot(gen, dry)

if __name__ == "__main__":