Python:RevertAll.py
From Botwiki
High quality translations - from experienced native speaking translators. Whether it is a one-page personal letter or 200-page technical manual, we deliver you the same superior quality. We entrust translating your documents to the expert native speaker with a competence in your subject area. Say, you hire us to translate a legal contract from English into French, and then it will be done by a native French speaker with an extensive expertise in translating legal documents.
# TO BE TESTED FOR BUGS # FORMERLY CALLED "THE PLAN OF [[Python:nukeAll.py]]" # REVERT ALL EDITS OF A USER # PAGE MOVES ET. AL. # AND DELETING THE CRAP # OR SUBMITTING FOR DELETION - DEPENDING ON THE SET-UP IN "# IF YOU ARE NOT A SYSOP," ....BLABLABLA # POSSIBLE GENERALISATIONS: 1. TO ALLOW FOR A GROUP OF VANDALS, # - SIMPLY TWEAK THE FINAL FOR-LOOP # 2. TRANS-WIKI CLEAN-UP? # - I NEED TO LEARN TO USE FUNCTION handleargs() ! AAAARRRRRRGGGGGGG # 3. BETTER JUDGEMENT OF PAGEMOVE: # USE THE FACT THATT PAGEMOVE DOESN'T CHANGE THE TEXT # 4. DELETE ALSO THE PAGES WHICH HAVE {{DELETE...}} STUFF # APPOLOGIES FOR INELEGANCE ### SET-UP : THE PARAMETERS , THE MESSAGES, ETC siteName = 'zh-yue.wikipedia.org' vandalName = 'Hillgentleman1' #IF YOUR COMMAND LINE DOESN'T SUPPORT UNICODE, # DO IT BY HAND HERE, # AND PRESS RETURN WHEN THE PROMPT "name of vandal" APPEARS timestamp = '' #SET THE TIMESTAMP HERE AND IGNORE THE MANUAL INPUT, IF YOU LIKE ; NOT YET USED IN THIS DRAFT uclimit = 500 #SET IT TO 5000 FOR A ROBOT, IF YOU LIKE; NOT YET USED IN THIS DRAFT sandboxName = 'Wikipedia:Sandbox' pagemoveWord=' moved to ' # THE SPACES ARE IMPORTANT IamSysop=False x = raw_input('Are you a sysop? - (y)es, default = False') if x!='' : IamSysop=True x = raw_input('Name of vandal, UPPER CASE FIRST? ('+vandalName+')') if x!= '': vandalName = x ##a = raw_input('timestamp?') # COMMENTED OUT, WHEN NOT NEEDED ##if a!= '': timestamp = a x = raw_input('siteName?(' + siteName + ')') if x!= '': siteName = x x = raw_input('sandboxName, UPPER CASE FIRST?(' + sandboxName + ')') if x!= '': sandboxName = x x = raw_input('page move word?("' + pagemoveWord + '")') if x!= '': pagemoveWord = x ### SUGGESTION: SET THE DEFAULT TIME LIMIT TO 2 HOURS, ### I.E. REVERTS ALL EDITS FROM AN IP-ADDRESS OR A USER ### TO WITHIN 2 HOURS (OR PERHAPS ONE DAY?) ### OPTIONAL PARAMETER: REVERT ALL THOSE AFTER A CERTAIN TIMESTAMP ### OR REVISION ID ### IMPORTING THE MODULES; CREATING THE SITE OBJECT import wikipedia import re import userContributions site = wikipedia.getSite() print site.encoding() #TESTING list = userContributions.userContributions(siteName,vandalName,timestamp) #userContributions.py IS NOT USING THE VARIABLE timestamp AT PRESENT ### REGEX SETUP ## DETECTING PAGEMOVES, MAY NEED SET UP FOR INDIVIDUAL WIKI FROM [[mediawiki:1movedto2]] movedfromX = re.compile( r'(?<=\[\[).*?(?=\]\]' + pagemoveWord + r')' , flags=re.U) movedtoX = re.compile( r'(?<=' + pagemoveWord + r'\[\[).*?(?=\]\])' , flags=re.U) ### DEFINE THE FUNCTIONS ## commentSaysMoved """ STUBBING UNTIL WE HAVE SOMETHING BETTER - SEE DISCUSSION ON [[META:META:BABEL]] """ def commentSaysMoved(comment=''): l1=movedfromX.findall(comment) l2=movedtoX.findall(comment) saysSo = False fromPage='' toPage ='' for i in l1: saysSo = True fromPage= i for i in l2: saysSo = True toPage = i return saysSo, fromPage, toPage ##### NOTE: WE ARE ASSUMING THAT THERE IS ONLY ONE VANDAL ##### MULTI-VANDALS WOULD CREATE EDIT-CONFLICTS AMONGST THEMSELVES, WHICH ARE COSTLY TO THEM ### "SECOND" (ACTUALLY FIRST) STEP: REVERT ALL EDITS - SOMEWHAT TESTED ### "THIRD" (ACTUALLY SECOND, BUT DONE TOGETHER WITH THE ABOVE) STEP: DELETING THE CRAP - KIND OF TESTED ### "FIRST" (ACTUALLY LAST) STEP: -REVERT ALL PAGEMOVES - SOMEWHAT TESTED ### -POTENTIAL IMPROVMENT: MOVE-AND-DELETE, NOT "SIMPLY MOVE" """ SHOULD WE USE getVersionHistory or fullVersionHistory? I ONLY KNOW HOW TO GET THE WIKITEXT WITH fullVersionHistory BUT IT RETURNS ALL EDITS AND IS SLOW.""" #PERHAPS THE ORDER SHOULD BE REVERSED, #BUT I DON'T KNOW WHICH WAY THE fullVersionHistory FUNCTION GOES moveList=[] for type , pageid , revid , ns, title , timestamp , minor , comment in list: # THE NEXT LINE IS TEMPORARILY COMMENTED OUT TO AVOID UNICODE PROBLEMS #if title == sandboxName : continue # DO NOT TOUCH [[Wikiversity:Sandbox]] # MORE ROBUST WOULD BE TO USE THE ID FOR SANDBOX; BUT NO MATTER, FOR A BIG WIKI SHOULD HAVE HAD ITS SANDBOX FIXED page = wikipedia.Page(site,title.encode(site.encoding())) #def getVersionHistory(self, forceReload=False, reverseOrder=False,getAll=False, revCount=500): #RETURNS(id, edit date/time, user name, edit summary) #def fullVersionHistory(self): #RETURNS (edit date/time, user name, content) edits = page.fullVersionHistory() edits.reverse() # REVERT THE HISTORICAL ORDER ! YOU CAN CHECK IT WITH fullVersionHistoryTest.py print '*** TITLE:' , title for time, userName, content in edits: print '*** TIMESTAMP, USERNAME, CONTENT:',time,userName,content[0:50] , '\n' if userName!=vandalName: break if userName!=vandalName: ### IF vandalName WERE NOT THE ONLY EDITOR TO THE PAGE page.put(content, 'reverting [[' + title + ']] to the edit at ' + time + u'by' + userName ) # IS id A STRING?? print('*reverting [[' + title + ']] to the edit at time=' + time + ' by ' + userName) #I DISLIKE wikipedia.output() #print ('TITLE:' , title, 'TIME:', time, 'USER:',userName) else: ### IF vandalName WERE THE ONLY EDITOR TO THE PAGE if IamSysop: page.delete( 'bot-deletion (see [[botwiki:python:revertAll.py]])of spam or vandalism: the content starts with:'+content[0:20] , False) # SETTING prompt = False ; CAN SET IT TO True IF YOU WANT print '\n\n*DELETED THE PAGE ' , title , ' BY [[user:' , vandalName , ']]' else: if page.isRedirectPage(): print '\n*Is a redirect!\n' continue # TEMPORARY MEASURE: DON'T TOUCH IT OR ELSE THE USER CAN'T MOVE IT; # ONLY SYSOP CAN DELETE-AND-MOVE page.put('{{delete|vandalism}}', 'submit for deletion: the only contributor was ' + vandalName ) print '\n\n*SUBMIT FOR DELETION: the only contributor was ' , vandalName print('\n* WHOSE CONTENT STARTS WITH:') print content[0:200] print('\n* AND WHOSE TITLE IS:') print title print '--------------------------------------------------------------------------------------------\n' ### DETECT PAGEMOVE: THE SIMPLEST MECHANISM NOW; NEED MORE SOPHISTICATION ### IF THERE IS A PAGEMOVE, FLAG IT; MOVE PAGES AT THE END ### IT NEEDS FIXES: IT DOESN'T WORK RIGHT NOW - PAGEMOVE UNDETECTED moved , a , b = commentSaysMoved(comment) # BOOLEAN, STRING, STRING ; SHOULD USE SOMETHING BETTER- SEE DISCUSSION ON META:META:BABEL if moved: moveList.append((b,a)) # MOVE BACK FROM "moved to" TO "moved from" IF BOTH movedfromX AND movedtoX MATCH print '\n*PAGE MOVE DETECTED: FROM [[' + a + ']] TO [[' + b + ']]' ### MOVING THE PAGES BACK: ### IF THERE WERE A SEQUENCE OF PAGE MOVES, WE MOVE IT BACK IN A SEQUENCE for i,j in moveList: p = wikipedia.Page(site,i.encode(site.encoding())) #MOVE FROM i TO j # MOVE IT, SETTING movetalkpage = True ; VALUE sysop LEFT AS DEFAULT (False) p.move(j , 'bot-moving (see [[botwiki:python:revertAll.py]]), reversing the contribution of [[user:' + vandalName + ']]' , True) print 'bot-moving (see [[botwiki:python:revertAll.py]]) from [[' + i + ']] to [[' +j+ ']], reversing the contribution of [[user:' + vandalName + ']]' crapRedirect = wikipedia.Page(site,i.encode(site.encoding())) if IamSysop: crapRedirect.delete('bot-deletion (see [[botwiki:python:revertAll.py]])of relic of pagemove vandalism by [[user'+vandalName+']]' ) print '\n\n*DELETED THE REDIRECT [[' + i + ']]' else: crapRedirect.put('{{delete|rubbish redirect, relic of vandalism by [[user:' + vandalName + ']]}}' , 'bot-submit for deletion, relic of vandalism by [[user:' + vandalName + ']]' , False) # SETTING CONFIRMATION=False # YOU CAN SET IT TO True IF YOU WANT print 'bot-submit for deletion, relic of vandalism by [[user:' + vandalName + ']]' wikipedia.stopme()
BlogMarks
del.icio.us
digg
Fark
Furl
Newsvine
reddit
Segnalo
Simpy
Slashdot
smarking
Spurl
Wists
