Purple exclamation mark.svg Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.

Red exclamation mark.svg UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.

Python:Import-it.py

From Botwiki
Jump to: navigation, search
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
This is a script written by Filnik using the import class.
 
 
Guide for Italian's importers (for the other, the code is well commented ^_-)
 
Allora non ho molta voglia di programmare in modo "super-figo" quindi vi dovete
un po' arrrangiare :P in ogni caso, se volete cambiare la pagina da cui caricare
modificate pageLoad = '' con quello che volete (togliete l'ultimo ' e cercate quello)
se invece volete cambiare regex, cercate (appunto) regex. Per il resto, basta
mettere il file nella pagina selezionata (e cambiare la regex, se serve)
e dargli ok. Difficile? :-) Il bot tiene un log, in Utente:Filbot/Log per aggiornare
poi una pagina con tutti gli import fatti. Quindi non commentate quelle linee, pls!
 
"""
#
# (C) Filnik, 2007
#
# Greetings:
# Lorenzo Paulatto and Misza13
#
# Distributed under the terms of the MIT license.
#
# Version: 2.5
#
 
import wikipedia, re, time, pagegenerators
from pageimport import *
 
# Global variables
site = wikipedia.getSite(u'it', u'wiktionary')
 
def generator(raw):
    # This is the regex that loads the pages! Modify it if your
    # input page is formatted in another way
    regex = re.compile(r"\* ?[0-9]([0-9])? (.*?) .*? \[\[(.*?)\]\]", re.UNICODE)
    #regex = "\[\[()(.*?)\]\]"
    for m in regex.finditer(raw):
        pagetoload = m.group(3)
        yield wikipedia.Page(site, pagetoload)
 
# ################################################################ #
def main():
    wikipedia.output(u'\t\t\t  >> Start! <<')
    pos = 0
    parseList = list()
 
    # This is the page where the Bot will check to upload the file.
    pageLoad = u'Wikizionario:Importare/Lista'
    # What follow is used to load the page and get the pages to import
    page = wikipedia.Page(site, pageLoad)
    raw = page.get()
    # parseList has all the articles to import and the Bot import them one by one
    preloadingGen = pagegenerators.PreloadingGenerator(generator(raw), pageNumber=60)
    for oldpag in preloadingGen:
        pagetoload = oldpag.title()
        newpag = pagetoload.title().lower()
        # Searching if the name of the page has brackets (so the bot will
        # delete them and what is inside)
        if u'(' in newpag:
            newpag = re.sub(r'(.*?) ?\((.*?)\) ?(.*?)', r'\1\3', newpag)
        oldpag2 = wikipedia.Page(site, newpag)
        # Check that the Page doesn't still exist
        if not oldpag.exists():
            if not oldpag2.exists():
                importerbot = Importer(site) # Inizializing the Bot
                # defing the wikipedia site from what i will import the pages
                pediasite = wikipedia.getSite(u'it', u'wikipedia') 
                replacepage =  wikipedia.Page(pediasite, pagetoload)
                try:
                    replacetext = replacepage.get()
                except wikipedia.NoPage():
                    wikipedia.output(u"%s e' stata cancellata..." % pagetoload)
                    continue
                # This is the tag the must be in the page to be imported
                tag = [u'{{trasferimento', u'{{template:trasferimento']
                ok = False
                for w in tag:                    
                    if w in replacetext.lower():
                        ok = True
                if ok == True:
                    # Compiling the log...
                    if not report(pagetoload, newpag):
                        continue # If the result is false, continue.                    
                    while 1:
                        try:
                            domandami = False
                            if domandami == False:
                                wikipedia.output(u'Importing %s...' % pagetoload)
                            result = importerbot.Import(pagetoload, prompt = domandami)
                        except KeyError:
                            wikipedia.output(u'Error! The page has a strange character! skip!')
                            continue
                        else:
                            if result:
                                break
                            else:
                                wikipedia.output(u'Sleeping for 8 seconds and retry!')
                                time.sleep(8)
                                continue
                    # Putting the speedy delation tag in the wikipedia page
                    newtestreplace = u'{{Cancella subito|Pagina trasferita su wiktionary}}\n' #re.sub(r'\{\{[Tt]rasferimento\|(.*?)\}\}', r'{{TC|wikt|' + newpag + '}}',  replacetext)
                    replacepage.put(newtestreplace + replacetext, u'Bot: Pagina trasferita')
                else:
                    wikipedia.output(u"%s has no more the trasfer-template! Skip!" % newpag)
                    continue
                pag = wikipedia.Page(site, pagetoload)
                num = 0
                while 1:
                    try:
                        result1 = pag.move(newpag, reason = u'Bot: Sposto pagina da Maiuscola a minuscola')
                        break
                    except wikipedia.PageNotSaved:
                        result1 = True # Page already exist.. yes, it has the same name but in lowercase!
                        break
                    except:
                        if num != 3:
                            time.sleep(5)
                            continue
                        else:
                            break
                if result1 == False:
                    break
                    wikipedia.stopme()
                else:
                    wikipedia.output(u'Page moved successfully!')
                result2 = pag.delete(u'Bot: Cancello redirect inutile', False)
                if result2 == False:
                    break
                    wikipedia.stopme()
                # This block isn't already tested and may give errors so, be careful ^_-
                # It may happen that something (i really don't know what) go wrong and
                # the media-wiki software (I have checked, the page deleted hasn't written
                # who has deleted it) delete wrongly not the redirect but the page created.
                # This block will undelete the page and delete the right one.
                while 1:
                    try:
                        netxt = wikipedia.Page(site, newpag).get()
                        break
                    except wikipedia.NoPage:
                        wikipedia.output(u'Page Deleted! WARNING! BUG! Trying to solve the problem...')
                        wikipedia.Page(site, newpag).undelete(u'Bot: Ripesco pagina cancellata per errore dal software mediawiki.')
                        pag.delete(u'Bot: Cancello redirect inutile', False)
                        continue
                # Deleting the empty category that are in the wiktionary page...
                putxt = re.sub(r'\[\[[Cc]ategor(ia|y):(.*?)\]\](\n)?', r'',  netxt)
                wikipedia.Page(site, newpag).put(putxt, u'Bot: Tolgo categoria di wikipedia.')
                continue
            else:
                wikipedia.output(u"%s is already in wiktionary, checking the log..." % newpag)
                report(oldpag2.title(), newpag)
                continue                
        else:
            wikipedia.output(u"%s is already in wiktionary, checking the log..." % newpag)
            report(oldpag.title(), newpag)
            continue
 
def report(pagelog, newpag, com = u'Bot: Aggiungo pagina al log',
           site = wikipedia.getSite(u'it', u'wiktionary'), rep_page = u'Utente:Filbot/Log'):
    # I've used a function to report the username to a wiki-page.
    another_page = wikipedia.Page(site, rep_page)
    if another_page.exists():
        text_get = another_page.get()
    else:
        usr = config.usernames[u'wiktionary']
        text_get = u"This is a report page for the imported pages, please translate me. --[[User:%s|%s]]" % (usr[u'it'], usr[u'it'])
    pos = 0
    # The talk page includes "_" between the two names, in this way i replace them to " "
    regex = pagelog
    n = re.compile(regex, re.UNICODE)
    y = n.search(text_get, pos)
    if y == None:
        #Titolo pagina wikipediana", "codice progetto di destinazione", "motivo", "titolo di destinazione",
        # Adding the log :)
        rep_text = u'\n\n"%s", "wikt", ".", "%s",' % (pagelog, newpag)
        another_page.put(u'%s%s' % (text_get, rep_text), comment = com, minorEdit = True)
        wikipedia.output(u"...Reported...")
        return True
    else:
        pos = y.end()
        wikipedia.output(u"Already in the log...")
        return False # Error, return False
 
if __name__=='__main__':
    try:
        main()
    finally:
        wikipedia.stopme()
Personal tools
Share