#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This is a script written by Filnik using the import class.
Guide for Italian's importers (for the other, the code is well commented ^_-)
Allora non ho molta voglia di programmare in modo "super-figo" quindi vi dovete
un po' arrrangiare :P in ogni caso, se volete cambiare la pagina da cui caricare
modificate pageLoad = '' con quello che volete (togliete l'ultimo ' e cercate quello)
se invece volete cambiare regex, cercate (appunto) regex. Per il resto, basta
mettere il file nella pagina selezionata (e cambiare la regex, se serve)
e dargli ok. Difficile? :-) Il bot tiene un log, in Utente:Filbot/Log per aggiornare
poi una pagina con tutti gli import fatti. Quindi non commentate quelle linee, pls!
"""
#
# (C) Filnik, 2007
#
# Greetings:
# Lorenzo Paulatto and Misza13
#
# Distributed under the terms of the MIT license.
#
# Version: 2.5
#
import wikipedia, re, time, pagegenerators
from pageimport import *
# Global variables
site = wikipedia.getSite(u'it', u'wiktionary')
def generator(raw):
# This is the regex that loads the pages! Modify it if your
# input page is formatted in another way
regex = re.compile(r"\* ?[0-9]([0-9])? (.*?) .*? \[\[(.*?)\]\]", re.UNICODE)
#regex = "\[\[()(.*?)\]\]"
for m in regex.finditer(raw):
pagetoload = m.group(3)
yield wikipedia.Page(site, pagetoload)
# ################################################################ #
def main():
wikipedia.output(u'\t\t\t >> Start! <<')
pos = 0
parseList = list()
# This is the page where the Bot will check to upload the file.
pageLoad = u'Wikizionario:Importare/Lista'
# What follow is used to load the page and get the pages to import
page = wikipedia.Page(site, pageLoad)
raw = page.get()
# parseList has all the articles to import and the Bot import them one by one
preloadingGen = pagegenerators.PreloadingGenerator(generator(raw), pageNumber=60)
for oldpag in preloadingGen:
pagetoload = oldpag.title()
newpag = pagetoload.title().lower()
# Searching if the name of the page has brackets (so the bot will
# delete them and what is inside)
if u'(' in newpag:
newpag = re.sub(r'(.*?) ?\((.*?)\) ?(.*?)', r'\1\3', newpag)
oldpag2 = wikipedia.Page(site, newpag)
# Check that the Page doesn't still exist
if not oldpag.exists():
if not oldpag2.exists():
importerbot = Importer(site) # Inizializing the Bot
# defing the wikipedia site from what i will import the pages
pediasite = wikipedia.getSite(u'it', u'wikipedia')
replacepage = wikipedia.Page(pediasite, pagetoload)
try:
replacetext = replacepage.get()
except wikipedia.NoPage():
wikipedia.output(u"%s e' stata cancellata..." % pagetoload)
continue
# This is the tag the must be in the page to be imported
tag = [u'{{trasferimento', u'{{template:trasferimento']
ok = False
for w in tag:
if w in replacetext.lower():
ok = True
if ok == True:
# Compiling the log...
if not report(pagetoload, newpag):
continue # If the result is false, continue.
while 1:
try:
domandami = False
if domandami == False:
wikipedia.output(u'Importing %s...' % pagetoload)
result = importerbot.Import(pagetoload, prompt = domandami)
except KeyError:
wikipedia.output(u'Error! The page has a strange character! skip!')
continue
else:
if result:
break
else:
wikipedia.output(u'Sleeping for 8 seconds and retry!')
time.sleep(8)
continue
# Putting the speedy delation tag in the wikipedia page
newtestreplace = u'{{Cancella subito|Pagina trasferita su wiktionary}}\n' #re.sub(r'\{\{[Tt]rasferimento\|(.*?)\}\}', r'{{TC|wikt|' + newpag + '}}', replacetext)
replacepage.put(newtestreplace + replacetext, u'Bot: Pagina trasferita')
else:
wikipedia.output(u"%s has no more the trasfer-template! Skip!" % newpag)
continue
pag = wikipedia.Page(site, pagetoload)
num = 0
while 1:
try:
result1 = pag.move(newpag, reason = u'Bot: Sposto pagina da Maiuscola a minuscola')
break
except wikipedia.PageNotSaved:
result1 = True # Page already exist.. yes, it has the same name but in lowercase!
break
except:
if num != 3:
time.sleep(5)
continue
else:
break
if result1 == False:
break
wikipedia.stopme()
else:
wikipedia.output(u'Page moved successfully!')
result2 = pag.delete(u'Bot: Cancello redirect inutile', False)
if result2 == False:
break
wikipedia.stopme()
# This block isn't already tested and may give errors so, be careful ^_-
# It may happen that something (i really don't know what) go wrong and
# the media-wiki software (I have checked, the page deleted hasn't written
# who has deleted it) delete wrongly not the redirect but the page created.
# This block will undelete the page and delete the right one.
while 1:
try:
netxt = wikipedia.Page(site, newpag).get()
break
except wikipedia.NoPage:
wikipedia.output(u'Page Deleted! WARNING! BUG! Trying to solve the problem...')
wikipedia.Page(site, newpag).undelete(u'Bot: Ripesco pagina cancellata per errore dal software mediawiki.')
pag.delete(u'Bot: Cancello redirect inutile', False)
continue
# Deleting the empty category that are in the wiktionary page...
putxt = re.sub(r'\[\[[Cc]ategor(ia|y):(.*?)\]\](\n)?', r'', netxt)
wikipedia.Page(site, newpag).put(putxt, u'Bot: Tolgo categoria di wikipedia.')
continue
else:
wikipedia.output(u"%s is already in wiktionary, checking the log..." % newpag)
report(oldpag2.title(), newpag)
continue
else:
wikipedia.output(u"%s is already in wiktionary, checking the log..." % newpag)
report(oldpag.title(), newpag)
continue
def report(pagelog, newpag, com = u'Bot: Aggiungo pagina al log',
site = wikipedia.getSite(u'it', u'wiktionary'), rep_page = u'Utente:Filbot/Log'):
# I've used a function to report the username to a wiki-page.
another_page = wikipedia.Page(site, rep_page)
if another_page.exists():
text_get = another_page.get()
else:
usr = config.usernames[u'wiktionary']
text_get = u"This is a report page for the imported pages, please translate me. --[[User:%s|%s]]" % (usr[u'it'], usr[u'it'])
pos = 0
# The talk page includes "_" between the two names, in this way i replace them to " "
regex = pagelog
n = re.compile(regex, re.UNICODE)
y = n.search(text_get, pos)
if y == None:
#Titolo pagina wikipediana", "codice progetto di destinazione", "motivo", "titolo di destinazione",
# Adding the log :)
rep_text = u'\n\n"%s", "wikt", ".", "%s",' % (pagelog, newpag)
another_page.put(u'%s%s' % (text_get, rep_text), comment = com, minorEdit = True)
wikipedia.output(u"...Reported...")
return True
else:
pos = y.end()
wikipedia.output(u"Already in the log...")
return False # Error, return False
if __name__=='__main__':
try:
main()
finally:
wikipedia.stopme()