# TO BE TESTED FOR BUGS
# FORMERLY CALLED "THE PLAN OF [[Python:nukeAll.py]]"
# REVERT ALL EDITS OF A USER
# PAGE MOVES ET. AL.
# AND DELETING THE CRAP
# OR SUBMITTING FOR DELETION - DEPENDING ON THE SET-UP IN "# IF YOU ARE NOT A SYSOP," ....BLABLABLA
# POSSIBLE GENERALISATIONS: 1. TO ALLOW FOR A GROUP OF VANDALS,
# - SIMPLY TWEAK THE FINAL FOR-LOOP
# 2. TRANS-WIKI CLEAN-UP?
# - I NEED TO LEARN TO USE FUNCTION handleargs() ! AAAARRRRRRGGGGGGG
# 3. BETTER JUDGEMENT OF PAGEMOVE:
# USE THE FACT THATT PAGEMOVE DOESN'T CHANGE THE TEXT
# 4. DELETE ALSO THE PAGES WHICH HAVE {{DELETE...}} STUFF
# APPOLOGIES FOR INELEGANCE
### SET-UP : THE PARAMETERS , THE MESSAGES, ETC
siteName = 'zh-yue.wikipedia.org'
vandalName = 'Hillgentleman1' #IF YOUR COMMAND LINE DOESN'T SUPPORT UNICODE,
# DO IT BY HAND HERE,
# AND PRESS RETURN WHEN THE PROMPT "name of vandal" APPEARS
timestamp = '' #SET THE TIMESTAMP HERE AND IGNORE THE MANUAL INPUT, IF YOU LIKE ; NOT YET USED IN THIS DRAFT
uclimit = 500 #SET IT TO 5000 FOR A ROBOT, IF YOU LIKE; NOT YET USED IN THIS DRAFT
sandboxName = 'Wikipedia:Sandbox'
pagemoveWord=' moved to ' # THE SPACES ARE IMPORTANT
IamSysop=False
x = raw_input('Are you a sysop? - (y)es, default = False')
if x!='' : IamSysop=True
x = raw_input('Name of vandal, UPPER CASE FIRST? ('+vandalName+')')
if x!= '': vandalName = x
##a = raw_input('timestamp?') # COMMENTED OUT, WHEN NOT NEEDED
##if a!= '': timestamp = a
x = raw_input('siteName?(' + siteName + ')')
if x!= '': siteName = x
x = raw_input('sandboxName, UPPER CASE FIRST?(' + sandboxName + ')')
if x!= '': sandboxName = x
x = raw_input('page move word?("' + pagemoveWord + '")')
if x!= '': pagemoveWord = x
### SUGGESTION: SET THE DEFAULT TIME LIMIT TO 2 HOURS,
### I.E. REVERTS ALL EDITS FROM AN IP-ADDRESS OR A USER
### TO WITHIN 2 HOURS (OR PERHAPS ONE DAY?)
### OPTIONAL PARAMETER: REVERT ALL THOSE AFTER A CERTAIN TIMESTAMP
### OR REVISION ID
### IMPORTING THE MODULES; CREATING THE SITE OBJECT
import wikipedia
import re
import userContributions
site = wikipedia.getSite()
print site.encoding() #TESTING
list = userContributions.userContributions(siteName,vandalName,timestamp) #userContributions.py IS NOT USING THE VARIABLE timestamp AT PRESENT
### REGEX SETUP
## DETECTING PAGEMOVES, MAY NEED SET UP FOR INDIVIDUAL WIKI FROM [[mediawiki:1movedto2]]
movedfromX = re.compile( r'(?<=\[\[).*?(?=\]\]' + pagemoveWord + r')' , flags=re.U)
movedtoX = re.compile( r'(?<=' + pagemoveWord + r'\[\[).*?(?=\]\])' , flags=re.U)
### DEFINE THE FUNCTIONS
## commentSaysMoved
""" STUBBING UNTIL WE HAVE SOMETHING BETTER - SEE DISCUSSION ON [[META:META:BABEL]] """
def commentSaysMoved(comment=''):
l1=movedfromX.findall(comment)
l2=movedtoX.findall(comment)
saysSo = False
fromPage=''
toPage =''
for i in l1:
saysSo = True
fromPage= i
for i in l2:
saysSo = True
toPage = i
return saysSo, fromPage, toPage
##### NOTE: WE ARE ASSUMING THAT THERE IS ONLY ONE VANDAL
##### MULTI-VANDALS WOULD CREATE EDIT-CONFLICTS AMONGST THEMSELVES, WHICH ARE COSTLY TO THEM
### "SECOND" (ACTUALLY FIRST) STEP: REVERT ALL EDITS - SOMEWHAT TESTED
### "THIRD" (ACTUALLY SECOND, BUT DONE TOGETHER WITH THE ABOVE) STEP: DELETING THE CRAP - KIND OF TESTED
### "FIRST" (ACTUALLY LAST) STEP: -REVERT ALL PAGEMOVES - SOMEWHAT TESTED
### -POTENTIAL IMPROVMENT: MOVE-AND-DELETE, NOT "SIMPLY MOVE"
""" SHOULD WE USE getVersionHistory or fullVersionHistory?
I ONLY KNOW HOW TO GET THE WIKITEXT WITH fullVersionHistory
BUT IT RETURNS ALL EDITS AND IS SLOW."""
#PERHAPS THE ORDER SHOULD BE REVERSED,
#BUT I DON'T KNOW WHICH WAY THE fullVersionHistory FUNCTION GOES
moveList=[]
for type , pageid , revid , ns, title , timestamp , minor , comment in list:
# THE NEXT LINE IS TEMPORARILY COMMENTED OUT TO AVOID UNICODE PROBLEMS
#if title == sandboxName : continue # DO NOT TOUCH [[Wikiversity:Sandbox]]
# MORE ROBUST WOULD BE TO USE THE ID FOR SANDBOX; BUT NO MATTER, FOR A BIG WIKI SHOULD HAVE HAD ITS SANDBOX FIXED
page = wikipedia.Page(site,title.encode(site.encoding()))
#def getVersionHistory(self, forceReload=False, reverseOrder=False,getAll=False, revCount=500):
#RETURNS(id, edit date/time, user name, edit summary)
#def fullVersionHistory(self):
#RETURNS (edit date/time, user name, content)
edits = page.fullVersionHistory()
edits.reverse() # REVERT THE HISTORICAL ORDER ! YOU CAN CHECK IT WITH fullVersionHistoryTest.py
print '*** TITLE:' , title
for time, userName, content in edits:
print '*** TIMESTAMP, USERNAME, CONTENT:',time,userName,content[0:50] , '\n'
if userName!=vandalName: break
if userName!=vandalName:
### IF vandalName WERE NOT THE ONLY EDITOR TO THE PAGE
page.put(content, 'reverting [[' + title + ']] to the edit at ' + time + u'by' + userName ) # IS id A STRING??
print('*reverting [[' + title + ']] to the edit at time=' + time + ' by ' + userName) #I DISLIKE wikipedia.output()
#print ('TITLE:' , title, 'TIME:', time, 'USER:',userName)
else:
### IF vandalName WERE THE ONLY EDITOR TO THE PAGE
if IamSysop:
page.delete( 'bot-deletion (see [[botwiki:python:revertAll.py]])of spam or vandalism: the content starts with:'+content[0:20] , False) # SETTING prompt = False ; CAN SET IT TO True IF YOU WANT
print '\n\n*DELETED THE PAGE ' , title , ' BY [[user:' , vandalName , ']]'
else:
if page.isRedirectPage():
print '\n*Is a redirect!\n'
continue # TEMPORARY MEASURE: DON'T TOUCH IT OR ELSE THE USER CAN'T MOVE IT;
# ONLY SYSOP CAN DELETE-AND-MOVE
page.put('{{delete|vandalism}}', 'submit for deletion: the only contributor was ' + vandalName )
print '\n\n*SUBMIT FOR DELETION: the only contributor was ' , vandalName
print('\n* WHOSE CONTENT STARTS WITH:')
print content[0:200]
print('\n* AND WHOSE TITLE IS:')
print title
print '--------------------------------------------------------------------------------------------\n'
### DETECT PAGEMOVE: THE SIMPLEST MECHANISM NOW; NEED MORE SOPHISTICATION
### IF THERE IS A PAGEMOVE, FLAG IT; MOVE PAGES AT THE END
### IT NEEDS FIXES: IT DOESN'T WORK RIGHT NOW - PAGEMOVE UNDETECTED
moved , a , b = commentSaysMoved(comment) # BOOLEAN, STRING, STRING ; SHOULD USE SOMETHING BETTER- SEE DISCUSSION ON META:META:BABEL
if moved:
moveList.append((b,a)) # MOVE BACK FROM "moved to" TO "moved from" IF BOTH movedfromX AND movedtoX MATCH
print '\n*PAGE MOVE DETECTED: FROM [[' + a + ']] TO [[' + b + ']]'
### MOVING THE PAGES BACK:
### IF THERE WERE A SEQUENCE OF PAGE MOVES, WE MOVE IT BACK IN A SEQUENCE
for i,j in moveList:
p = wikipedia.Page(site,i.encode(site.encoding())) #MOVE FROM i TO j
# MOVE IT, SETTING movetalkpage = True ; VALUE sysop LEFT AS DEFAULT (False)
p.move(j , 'bot-moving (see [[botwiki:python:revertAll.py]]), reversing the contribution of [[user:' + vandalName + ']]' , True)
print 'bot-moving (see [[botwiki:python:revertAll.py]]) from [[' + i + ']] to [[' +j+ ']], reversing the contribution of [[user:' + vandalName + ']]'
crapRedirect = wikipedia.Page(site,i.encode(site.encoding()))
if IamSysop:
crapRedirect.delete('bot-deletion (see [[botwiki:python:revertAll.py]])of relic of pagemove vandalism by [[user'+vandalName+']]' )
print '\n\n*DELETED THE REDIRECT [[' + i + ']]'
else:
crapRedirect.put('{{delete|rubbish redirect, relic of vandalism by [[user:' + vandalName + ']]}}' , 'bot-submit for deletion, relic of vandalism by [[user:' + vandalName + ']]' , False)
# SETTING CONFIRMATION=False
# YOU CAN SET IT TO True IF YOU WANT
print 'bot-submit for deletion, relic of vandalism by [[user:' + vandalName + ']]'
wikipedia.stopme()