Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.
UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.
Python:Import-it.py
#!/usr/bin/python # -*- coding: utf-8 -*- """ This is a script written by Filnik using the import class. Guide for Italian's importers (for the other, the code is well commented ^_-) Allora non ho molta voglia di programmare in modo "super-figo" quindi vi dovete un po' arrrangiare :P in ogni caso, se volete cambiare la pagina da cui caricare modificate pageLoad = '' con quello che volete (togliete l'ultimo ' e cercate quello) se invece volete cambiare regex, cercate (appunto) regex. Per il resto, basta mettere il file nella pagina selezionata (e cambiare la regex, se serve) e dargli ok. Difficile? :-) Il bot tiene un log, in Utente:Filbot/Log per aggiornare poi una pagina con tutti gli import fatti. Quindi non commentate quelle linee, pls! """ # # (C) Filnik, 2007 # # Greetings: # Lorenzo Paulatto and Misza13 # # Distributed under the terms of the MIT license. # # Version: 2.5 # import wikipedia, re, time, pagegenerators from pageimport import * # Global variables site = wikipedia.getSite(u'it', u'wiktionary') def generator(raw): # This is the regex that loads the pages! Modify it if your # input page is formatted in another way regex = re.compile(r"\* ?[0-9]([0-9])? (.*?) .*? \[\[(.*?)\]\]", re.UNICODE) #regex = "\[\[()(.*?)\]\]" for m in regex.finditer(raw): pagetoload = m.group(3) yield wikipedia.Page(site, pagetoload) # ################################################################ # def main(): wikipedia.output(u'\t\t\t >> Start! <<') pos = 0 parseList = list() # This is the page where the Bot will check to upload the file. pageLoad = u'Wikizionario:Importare/Lista' # What follow is used to load the page and get the pages to import page = wikipedia.Page(site, pageLoad) raw = page.get() # parseList has all the articles to import and the Bot import them one by one preloadingGen = pagegenerators.PreloadingGenerator(generator(raw), pageNumber=60) for oldpag in preloadingGen: pagetoload = oldpag.title() newpag = pagetoload.title().lower() # Searching if the name of the page has brackets (so the bot will # delete them and what is inside) if u'(' in newpag: newpag = re.sub(r'(.*?) ?\((.*?)\) ?(.*?)', r'\1\3', newpag) oldpag2 = wikipedia.Page(site, newpag) # Check that the Page doesn't still exist if not oldpag.exists(): if not oldpag2.exists(): importerbot = Importer(site) # Inizializing the Bot # defing the wikipedia site from what i will import the pages pediasite = wikipedia.getSite(u'it', u'wikipedia') replacepage = wikipedia.Page(pediasite, pagetoload) try: replacetext = replacepage.get() except wikipedia.NoPage(): wikipedia.output(u"%s e' stata cancellata..." % pagetoload) continue # This is the tag the must be in the page to be imported tag = [u'{{trasferimento', u'{{template:trasferimento'] ok = False for w in tag: if w in replacetext.lower(): ok = True if ok == True: # Compiling the log... if not report(pagetoload, newpag): continue # If the result is false, continue. while 1: try: domandami = False if domandami == False: wikipedia.output(u'Importing %s...' % pagetoload) result = importerbot.Import(pagetoload, prompt = domandami) except KeyError: wikipedia.output(u'Error! The page has a strange character! skip!') continue else: if result: break else: wikipedia.output(u'Sleeping for 8 seconds and retry!') time.sleep(8) continue # Putting the speedy delation tag in the wikipedia page newtestreplace = u'{{Cancella subito|Pagina trasferita su wiktionary}}\n' #re.sub(r'\{\{[Tt]rasferimento\|(.*?)\}\}', r'{{TC|wikt|' + newpag + '}}', replacetext) replacepage.put(newtestreplace + replacetext, u'Bot: Pagina trasferita') else: wikipedia.output(u"%s has no more the trasfer-template! Skip!" % newpag) continue pag = wikipedia.Page(site, pagetoload) num = 0 while 1: try: result1 = pag.move(newpag, reason = u'Bot: Sposto pagina da Maiuscola a minuscola') break except wikipedia.PageNotSaved: result1 = True # Page already exist.. yes, it has the same name but in lowercase! break except: if num != 3: time.sleep(5) continue else: break if result1 == False: break wikipedia.stopme() else: wikipedia.output(u'Page moved successfully!') result2 = pag.delete(u'Bot: Cancello redirect inutile', False) if result2 == False: break wikipedia.stopme() # This block isn't already tested and may give errors so, be careful ^_- # It may happen that something (i really don't know what) go wrong and # the media-wiki software (I have checked, the page deleted hasn't written # who has deleted it) delete wrongly not the redirect but the page created. # This block will undelete the page and delete the right one. while 1: try: netxt = wikipedia.Page(site, newpag).get() break except wikipedia.NoPage: wikipedia.output(u'Page Deleted! WARNING! BUG! Trying to solve the problem...') wikipedia.Page(site, newpag).undelete(u'Bot: Ripesco pagina cancellata per errore dal software mediawiki.') pag.delete(u'Bot: Cancello redirect inutile', False) continue # Deleting the empty category that are in the wiktionary page... putxt = re.sub(r'\[\[[Cc]ategor(ia|y):(.*?)\]\](\n)?', r'', netxt) wikipedia.Page(site, newpag).put(putxt, u'Bot: Tolgo categoria di wikipedia.') continue else: wikipedia.output(u"%s is already in wiktionary, checking the log..." % newpag) report(oldpag2.title(), newpag) continue else: wikipedia.output(u"%s is already in wiktionary, checking the log..." % newpag) report(oldpag.title(), newpag) continue def report(pagelog, newpag, com = u'Bot: Aggiungo pagina al log', site = wikipedia.getSite(u'it', u'wiktionary'), rep_page = u'Utente:Filbot/Log'): # I've used a function to report the username to a wiki-page. another_page = wikipedia.Page(site, rep_page) if another_page.exists(): text_get = another_page.get() else: usr = config.usernames[u'wiktionary'] text_get = u"This is a report page for the imported pages, please translate me. --[[User:%s|%s]]" % (usr[u'it'], usr[u'it']) pos = 0 # The talk page includes "_" between the two names, in this way i replace them to " " regex = pagelog n = re.compile(regex, re.UNICODE) y = n.search(text_get, pos) if y == None: #Titolo pagina wikipediana", "codice progetto di destinazione", "motivo", "titolo di destinazione", # Adding the log :) rep_text = u'\n\n"%s", "wikt", ".", "%s",' % (pagelog, newpag) another_page.put(u'%s%s' % (text_get, rep_text), comment = com, minorEdit = True) wikipedia.output(u"...Reported...") return True else: pos = y.end() wikipedia.output(u"Already in the log...") return False # Error, return False if __name__=='__main__': try: main() finally: wikipedia.stopme()