Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.
UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.
Manual:Replacer.py
# -*- coding: utf-8 -*- ''' Critical issues: * max. 10 threads to not make too many edits and keep database connections below limit * www.mediawiki.org must be supported FIMEs: *Right after getting the CheckUsage results, it start retrieving [[User:CommonsDelinker/replace-I18n]]. It does this multiple times for each site. That's quite inefficient. ''' import wikipedia, config, codecs import urllib2, re, time, thread import MySQLdb months=['', "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"] summaries={'default':u'[[w:commons:User:Orgullobot/commands|Bot]]: Replacing $1 with $2. [[m:User:CommonsDelinker|Translate me]] [[User:CommonsDelinker/replace-I18n|here]]!'} done=[] editing=[]#a list of pages the bot is currently editing/checking, to avoid edit conflicts with the threads #Note: This is NOT a good way to do this. existentes=[] #a list of pages that we have checked if they exist, as to not check them over again. def pageText(url): request=urllib2.Request(url) user_agent='Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7' #print url request.add_header("User-Agent", user_agent) response=urllib2.urlopen(request) text=response.read() response.close() return text def checanombres(page): #checanombres('es', 'wikipedia') dicc={} crudo=pageText('http://'+page.site().hostname()+'/w/api.php?action=query&prop=revisions&titles='+page.urlname()+'&rvprop=content&format=xml') carne=crudo.split('<page ')[1].split('>')[0] ns=carne.split('ns="')[1].split('"')[0] return int(ns) pagelink=wikipedia.Page(wikipedia.Site('es', 'wikipedia'), 'Wikipedia:Putos') wikipedia.output( u'ChecaNombres: '+str(checanombres(pagelink)) ) records=[] def record(hora, page, img, new_image_name="NULL", status="ok"): records.append((hora, page, img, new_image_name, status)) if len(records)>50: print 'Recording...' conn = MySQLdb.connect(host="sql",user="orgullo", passwd="****",db="u_orgullo_logs", charset='utf8', use_unicode=1) cursor=conn.cursor() cursor.execute('set names utf8;') for archivo in records: hora=archivo[0] page=archivo[1] img=archivo[2] new_image_name=archivo[3] status=archivo[4] rightnow=str(time.time()) canIDB(rightnow) wikiT=str(page.site()).split(':') if wikiT[0]=='commons': wikiT=['wikimedia', 'commons'] wiki=wikiT[1]+'.'+wikiT[0]+'.org' fortit=page.titleWithoutNamespace().replace(' ', '_') img=img.replace(' ', '_') img=conn.escape_string(img.encode('utf-8')).decode('utf-8') new_image_name=conn.escape_string(new_image_name.encode('utf-8')).decode('utf-8') fortit=conn.escape_string(fortit.encode('utf-8')).decode('utf-8') query=r"insert into delinker VALUES('"+hora+"','"+img+"', '"+wiki+"', '"+fortit+"', '"+str(checanombres(page))+"', '"+status+"', '"+new_image_name+"');" cursor.execute(query) conn.commit() conn.close() dbt=open('dbthrottle.txt', 'r') dbtt=dbt.read() dbt.close() newdbtt=dbtt.replace(rightnow+'\n', '') dbtW=open('./dbthrottle.txt', 'w') dbtW.write(newdbtt) dbtW.close() while records != []: records.remove(records[0]) #record(time.time(), wikipedia.Page(wikipedia.getSite(), 'This is a test'), 'Testimage.jpg') #'%Y-%m-%d %H:%M #'2006-09-22 21:01' def exists(page): """This is much more efficient for the servers""" #http://es.wikipedia.org/w/query.php?what=content&titles=Image:Punta%20del%20Este.jpg&aplimit=1&format=xml if page in existentes: return True path='http://'+page.site().hostname()+'/w/query.php?what=imageinfo&titles='+page.urlname()+'&aplimit=1&format=xml' crudo=pageText(path) identi=crudo.split('<id>')[1].split('</id>')[0] espacio=crudo.split('<ns>')[1].split('</ns>')[0] if identi != "0": if espacio=="6": if not '<image ' in crudo: return False existentes.append(page) return True else: return False def getcommands(): uni=[] restored=[] lo=codecs.open('commons-commands.txt', 'r', 'utf-8') lotxt=lo.read() lo.close() return lotxt def canIedit(): if '{{stop}}' in getcommands().lower(): return False else: return True def canIDB(rightnow): try: canIgo=False while canIgo==False: dbthrottleCheck=open('dbthrottle.txt', 'r') dbthrottleCheckTxt=dbthrottleCheck.read() dbthrottleCheck.close() if dbthrottleCheckTxt.count('\n')<10: dbthrottleA=open('dbthrottle.txt', 'a') dbthrottleA.write(rightnow+'\n') return True time.sleep(10) except IOError: time.sleep(10) def summary(wiki_site): try: if wiki_site in summaries: if time.time()-summaries[wiki_site][1]<3600:##reload the summary if it's over an hour old return summaries[wiki_site][0] pl=wikipedia.Page(wiki_site, u'User:CommonsDelinker/replace-I18n') try: x=pl.get() summaries[wiki_site]=[x, time.time()] return x except wikipedia.NoPage: if not 'wikipedia' in str(wiki_site): lang=str(wiki_site).split(':')[-1] if lang in ('incubator', 'meta', 'commons', 'species'): new_site=wikipedia.Site('en', 'wikipedia') else: new_site=wikipedia.Site(lang, 'wikipedia') return summary(new_site) summaries[wiki_site]=[summaries['default'], time.time()] return summaries['default'] except: return summaries['default'] def replace_image(img, pg, newimg): tocon='a'*14 if canIedit()==False: return None print ('Replacing image', img, pg, pg.site(), newimg) fix=wikipedia.Page(pg.site(), img) img=fix.titleWithoutNamespace() ext1=img.split('.')[-1] ext2=newimg.split('.')[-1] print 'Extensions: '+ext1+', '+ext2 if ext2.lower()=='svg': if ext1.lower() !='svg': print 'Ignoring non-SVG to SVG replacement.' return None newimg=wikipedia.Page(pg.site(), newimg).titleWithoutNamespace() while pg in editing: time.sleep(3) editing.append(pg) msg=summary(pg.site()) msg=msg.replace('$1', img) msg=msg.replace('$2', newimg) imagen=pg.site().namespace(6) wikipedia.output(pg.title()) if pg.namespace() not in [99999999]: txt=pg.get() newTxt=txt if pg.site() != wikipedia.Site('commons', 'commons'): ce=wikipedia.Page(pg.site(), 'Image:'+img) if exists(ce): print 'Pulling out' return None forpat=img toescape=('.', '(', ')') for te in toescape: forpat=forpat.replace(te, '\\'+te) rx=r'['+img[0].upper()+forpat[0].lower()+']'+forpat[1:] if ' ' in rx: rx=rx.replace(' ', '[ _]') elif '_' in rx: rx=rx.replace('_', '[ _]') print [rx] posis=re.findall(rx, newTxt) print posis for posi in posis: newTxt=newTxt.replace(posi, newimg) if txt != newTxt: try: ##We want to make sure the userpage is not empty filename='canedit.cdl' f=codecs.open(filename, 'r', 'utf-8') ftxt=f.read() f.close() if not '#'+str(pg.site()) in ftxt: userpage=wikipedia.Page(pg.site(), 'User:CommonsDelinker') if not exists(userpage): userpage.put('#Redirect[[m:User:CommonsDelinker]]', '') f=codecs.open(filename, 'a', 'utf-8') f.write('#'+str(pg.site())) f.close() wikipedia.showDiff(txt, newTxt) pg.put(newTxt, msg) thread.start_new_thread(record, (tocon, pg, img, newimg, "ok")) except wikipedia.LockedPage: thread.start_new_thread(record, (tocon, pg, img, newimg, "failed")) print 'Page is locked' else: #thread.start_new_thread(record, (tocon, pg, img, newimg, "skipped")) wikipedia.output( u'No match: '+pg.site().hostname()+'/wiki/'+pg.urlname() ) while pg in editing: editing.remove(pg) def checkUsage(image, newimg): print ('check usage', image, newimg) imageU=wikipedia.Page(wikipedia.getSite(), image).urlname() path='http://tools.wikimedia.de/%7Edaniel/WikiSense/CheckUsage.php?i='+imageU+'&w=_100000#end' ch=pageText(path).decode('utf-8') projs=ch.split("class='project'")[1:] print (str(len(projs))+ u' projects for', image) for proj in projs: baseR=ur'wik(?:i[mp]edia|ibooks|tionary|iquote|inews|isource|iversity)' proid=re.findall('http://([^\.]*\.'+baseR+'\.org)', proj)[0] #couldbe=('<i><b>different image', '<i>found:', '<i>local duplicate') #found=0 #for could in couldbe: # if could in proj: # found=1 #if found==1: # #going on, this project has a local copy # continue tabla=proj.split("<div class='page'>")[1:] wikipedia.output(u'Checkusage returns '+str(len(tabla))+' for '+image+' in '+proid+'.') for ta in tabla: ta=ta.split('</div>')[0] url=ta.split('<a href="')[1].split('?uselang=en"')[0] wpR=ur'http://(commons|incubator|meta|species|www|[^\.]*)\.('+baseR+')\.org/wiki/(.*)' wikipedia.output( wpR ) wp=re.findall(wpR, url) print wp if wp != []: if wp[0][0]==u'commons': wp=['commons', 'commons', wp[0][2]] elif wp[0][0]==u'meta': wp=['meta', 'meta', wp[0][2]] elif wp[0][0]==u'incubator': wp=['incubator', 'incubator', wp[0][2]] elif wp[0][0]==u'www': wp=['mediawiki', 'mediawiki', wp[0][2]] elif wp[0][0]==u'species': wp=['species', 'species', wp[0][2]] else: wp=wp[0] pagelink=wikipedia.Page(wikipedia.Site(wp[0], wp[1]), wp[2]) try: user=config.usernames[wp[1]][wp[0]] #retirar_imagen(image, pagelink, admin) while pagelink in editing: time.sleep(3) thread.start_new_thread(replace_image, (image, pagelink, newimg)) except KeyError: continue hechas=[] fiables=['User:Orgullobot/commands'] def RUN(): for fiable in fiables: cmd=wikipedia.Page(wikipedia.Site('commons', 'commons'), fiable) texto=cmd.get() lfile=codecs.open('./commons-commands.txt', 'w', 'utf-8') lfile.write(texto) lfile.close() texto=getcommands() chuletas=texto.split('{{') chuletas.remove(chuletas[0]) for chuleta in chuletas: if chuleta in hechas: chuletas.remove(chuleta) continue else: hechas.append(chuleta) com=chuleta.split('|')[0] if com.lower() in ['universal replace', 'universal_replace']: img=chuleta.split('|')[1] newimg=chuleta.split('|')[2].split('}}')[0] thread.start_new_thread(checkUsage, (img, newimg)) time.sleep(5) #checkUsage(img, newimg) while 2==2: RUN() time.sleep(60)