Purple exclamation mark.svg Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.

Red exclamation mark.svg UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.

Python:Talkindex.py

From Botwiki
Jump to: navigation, search

Talk page index bot. Run in chinese wikipedia. see zh:User:Talkindexbot. Used w:zh:Wikipedia:对话页讨论索引.

#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
talkindex.py v2.13 by [[zh:user:Shizhao]]
 
 
 
"""
#
# (C) Shizhao, 2008
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: talkindex.py,v 2.13 2008-03-14T21:59:00 Shizhao $'
#
import os
import sys
import urllib
import re, time, datetime
import wikipedia, config, cosmetic_changes
import xml.parsers.expat 
site = wikipedia.getSite()
 
def GetPage(rcstart,rcend, rcns):
    #"""从最近更改API获取XML正文"""
    baseurl = 'http://zh.wikipedia.org/w/api.php?action=query&list=recentchanges&rcstart=%s&rcend=%s&rcnamespace=%s&rcprop=title|user|timestamp|comment&rclimit=500&format=xml'
 
    uo=wikipedia.MyURLopener()
    url = baseurl % (rcstart, rcend, rcns)
    u = uo.open(url)
    wikipedia.output(u'URL:\n%s' % url)
    return u.read()
 
tempList = []
titleList=[]
talkList=[]
tlist=[]
tulist=[]
talkformat='''/* (.*?) \*/'''
n=0
template='''* {{talkindex|title=%s|user=%s|talk=%s|time=%s|indiscussion=%s|hot=%s|fg=%s|nomain=%s|disambig=%s|redirect=%s|empty=%s|protect=%s|nowcommons=%s|talkprotect=%s}}\n'''
#分析xml数据,得到所有内容的列表
def listelement(name, attrs):
    if   name   ==  'rc':
        try:
            title=attrs[u'''title''']
            user=attrs[u'''user''']
            talk=attrs[u'''comment''']
            timestamp=attrs[u'''timestamp''']
            try:
#排除重复项目
                talk=re.search(talkformat, talk).group(1)
                t=title+'#'+talk
                tu=t+user                
                tlist.append(t)
                tulist.append(tu)
            except AttributeError:
                return
        except KeyError:
            return
    else:
        return	
 
def remove_dups(lst):
    """ Removes duplicate elements from list. Drawbacks:
        - Returns an unsorted list. 
        - Does not work with lists, dicts etc. as list elements.
    """
    return list(set(lst))
 
#得到用户数量与编辑次数
def usersum(t):
    tu1list=[]
    for tt1, tu1 in zip(tlist, tulist):
        x=[elem for elem in tlist if tlist.count(elem) > 1 and elem==t]
        if x.count(tt1)>1:
            tu1list.append(tu1)
    nuser=len(remove_dups(tu1list))
    ntalk=len(tu1list)	
    x=[]
    return nuser,ntalk
 
#标记热点讨论
def bighot(t, title):
    nedit=usersum(t)
    if nedit[0]>1 and nedit[1]>2:
        hot='yes'
        wikipedia.output('"Hot!" In [[%s]] have %s users (%s edits) Talking......!' % (title, nedit[0], nedit[1]))
    else:
        hot='no'
    return hot
 
#添加 {{indiscussion}} 模板到对话页
def IndiscussionAuto(t, title, talk, days, i):
    nedit=usersum(t)
    print nedit, i
    if nedit[0]>1 and nedit[1] >4 and i==None:
 
        pg=wikipedia.Page(site,title)
        text    = pg.get()
 
 #       for level in range(1, 7):
 #           equals = '=' * level
        text = wikipedia.replaceExcept(text, r'\n' + '==' + ' *(?P<title>[^=]+?) *' + '==' + ' *\r\n', '\n' + '==' + ' \g<title> ' + '==' + '\r\n', ['comment', 'math', 'nowiki', 'pre'])
#        wikipedia.output(text)
        text = re.sub('=='+' '+talk+' '+'=='+'\n', '=='+' '+talk+' '+ '=='+ '\n{{indiscussion|'+str(time.gmtime()[1])+'}}'+'\n',text)	
        pg.put(text, u'Bot添加 {{indiscussion}} 模板,最近%s天有%s位用户正在讨论“[[%s|%s]]”话题,已经编辑%s次' %(days,nedit[0], t,talk, nedit[1] ))
        wikipedia.output(u'flag {{indiscussion}}: %s' % talk)		
 #       wikipedia.output(text)	
#更新{{CurrentDiscussion}}   模板         
def CurrentDiscussion():
    template = 'indiscussion'
    regex ='%s' % template
 
    s=wikipedia.Page(site, 'Template:%s' % template)
 
    pages = [page for page in s.getReferences(onlyTemplateInclusion=True)]
    t=wikipedia.Page(site, 'Template:CurrentDiscussion')
    c= u"<font color=red>%s</font>项<noinclude>\n----\n参见[[:Category:進行中的討論|進行中的討論]]\n[[category:維基站務模板|C]]\n</noinclude>" % len(pages)
    comment= u"Bot更新: 当前有 %s 项专题讨论" % len(pages)
    wikipedia.output('[[Template:CurrentDiscussion]] update: Current %s Discussion' % len(pages))
    t.put(c, comment, minorEdit=False)            
 
#标注专题讨论
def Indiscussion(t, title,talk,days):
    s=wikipedia.Page(site,title)
    try:
        text=s.get()
 
        re.search('=* *'+talk+' *=*' + ' *\n*' + '\{\{indiscussion(|)(.*?)\}\}', text, re.I).group(0)
        i='yes'
        wikipedia.output(u'"{{Indiscussion}}" fond in [[%s]]' % title)
    except:
 
        i=None
 
        return
    return i
 
 
#标记特色与优良条目(特色列表)
def FGflag(title):
    """ wikipedia.getCategoryLinks()暂时不工作,只得到空的分类列表。"""
 
    pg=wikipedia.Page(site,title)
 
#        plist    = pg.categories()
    templatelist=pg.templates()
#        print templatelist
    if (templatelist.count(u'特色条目')!=0 or templatelist.count(u'特色列表')) !=0 and templatelist.count('GA')==0:
        fg='FA'
    elif (templatelist.count(u'特色条目')==0 or templatelist.count(u'特色列表') ==0) and templatelist.count('GA')!=0:
        fg='GA'
    elif templatelist.count(u'特色条目')!=0  and templatelist.count('GA')!=0:
        fg='ERROR'
        wikipedia.output(u'ERROR: Plese fix FA or GA!')	
    else:
        fg=""	
 
    return fg
 
 
 
def nontalk(title):
    """对话页所对应的主名字空间状态,包括主名字空间是否存在、是否消歧义、是否重定向、是否空条目(人为破坏)
    是否被保护、图像是否在commons。以及判断对话页是否被保护
    """
    pg=wikipedia.Page(site,title)
    nontalkpage=pg.toggleTalkPage()
    stuts={u'nomain':'',u'disambig':'',u'redirect':'',u'empty':'',u'protect':'',u'nomain':'',u'nowcommons':'',u'talkprotect':''}
    if nontalkpage.exists():
        stuts[u'nomain']=''
        if nontalkpage.isDisambig():
            stuts[u'disambig']='yes'
            wikipedia.output(u'[[%s]] is Disambig page!' % nontalkpage.title())
        else:
            stuts[u'disambig']=''
        if nontalkpage.isRedirectPage():
            stuts[u'redirect']='yes'
            wikipedia.output(u'[[%s]] is Redirect Page!' % nontalkpage.title())
        else:
            stuts[u'redirect']=''
        if nontalkpage.isEmpty() and not nontalkpage.isRedirectPage():
            stuts[u'empty']='yes'
            wikipedia.output(u'WARING: [[%s]] is Empty!!!' % nontalkpage.title())
        else:
            stuts[u'empty']=''
        if nontalkpage.canBeEdited():
            stuts[u'protect']=''
        else:
            stuts[u'protect']='yes'
            wikipedia.output(u'[[%s]] is protected!!!' % nontalkpage.title())
    else:
        stuts[u'nomain']='none'
        wikipedia.output(u'[[%s]] is Not exist!!!' % nontalkpage.title())
 
    if nontalkpage.isImage():
        imagepage=wikipedia.ImagePage(site, nontalkpage.title())
        if imagepage.fileIsOnCommons():
            stuts[u'nowcommons']='yes'
        else:
            stuts[u'nowcommons']=''
        templatelist=nontalkpage.templates()
        if templatelist.count(u'FeaturedPicture') <>0:
            fg='FA'
        else:
            fg=''
 
 
    if pg.canBeEdited():
        stuts[u'talkprotect']=''
    else:
        stuts[u'talkprotect']='yes'
        wikipedia.output(u'[[%s]] is Protected!!!' % title)
 
    return stuts
 
 
def start(name, attrs):
    temp=''''''
    ns = ''
    title = ''''''
    user = ''''''
    timestamp=''
    talk=''''''
    global n
    if   name   ==  'rc':
        try:
            title=attrs[u'''title''']
            user=attrs[u'''user''']
            talk=attrs[u'''comment''']
            timestamp=attrs[u'''timestamp''']
            ns=attrs[u'''ns''']
            try:
#排除重复项目
                talk=re.search(talkformat, talk).group(1)
                t=title+'#'+talk
                tu=t+user
                if (titleList.count(title)==0 and talkList.count(talk)>0) or (titleList.count(title)>0 and talkList.count(talk)==0) or (titleList.count(title)==0 and talkList.count(talk)==0):
                    if wikipedia.Page(site,title).isRedirectPage():
                        wikipedia.output(u'%s is Redirect Page.' % title)
                    else:
                        titleList.append(title)
 
                        talkList.append(talk)
#                        print talkList, titleList
#   扩展部分,标注某些项目    ------------------------------------------------------ 
                        hot=bighot(t, title)
 
 
                        stuts=nontalk(title)
                        if ns=='1':
                            fg=FGflag(title)
                        else:
                            fg=''
                        i=Indiscussion(t,title,talk,days)
                        IndiscussionAuto(t, title, talk, days, i)
 
 #                       print stuts
                        temp = template % (title, user, talk, timestamp, i, hot, fg, stuts[u'nomain'], stuts[u'disambig'], stuts[u'redirect'],stuts[u'empty'], stuts[u'protect'], stuts[u'nowcommons'], stuts[u'talkprotect'])
#                        print 'HI', temp
 
                        tempList.append(temp)
                        n=n+1
 
            except AttributeError:
                return
        except KeyError:
            return
    else:
        return	
 
#得到xml数据	
def Parsexml(html, start_element):
    p = xml.parsers.expat.ParserCreate()  
    p.StartElementHandler = start_element  
    p.returns_unicode = True 
    p.Parse(html) 
 
def run():
    """分析页面,提取所有有用信息"""
    while True:
 
#格式化日期
        rcstart = time.strftime("%Y%m%d%H%M%S",time.gmtime())
        y=time.strftime("%Y",time.gmtime())
        m=time.strftime("%m",time.gmtime())
        d=time.strftime("%d",time.gmtime())
        h=time.strftime("%H",time.gmtime())
        min=time.strftime("%M",time.gmtime())
        s=time.strftime("%S",time.gmtime())
        end=datetime.datetime(int(y),int(m),int(d),int(h),int(min),int(s)) 
        end = end - datetime.timedelta(days=days) 
        rcend = end.strftime("%Y%m%d%H%M%S")
#根据名字空间建立不同页面
        ns={'1':u'Wikipedia:对话页讨论索引/条目','5':u'Wikipedia:对话页讨论索引/wikipedia','7':u'Wikipedia:对话页讨论索引/图像','9':u'Wikipedia:对话页讨论索引/mediawiki','11':u'Wikipedia:对话页讨论索引/模板','13':u'Wikipedia:对话页讨论索引/帮助','15':u'Wikipedia:对话页讨论索引/分类','101':u'Wikipedia:对话页讨论索引/主题'}
        for rcns, wiki in ns.items():
 
 
            html = GetPage(rcstart,rcend, rcns)
 
            Parsexml(html, listelement)
            Parsexml(html, start)
 
            global n, tempList
            if rcns== '1':
                basewiki=u'''<noinclude>\n{{talkindex panel}}\n本页是中文维基百科条目对话页上活跃讨论的索引。由[[user:Talkindexbot|]]定期更新。\n</noinclude>\n\n== 最近%s天在条目对话页上的讨论 ==\n''' % days
            elif rcns == '5':
                basewiki=u'''<noinclude>\n{{talkindex panel}}\n本页是中文维基百科Wikipedia(项目、方针等页面)对话页上活跃讨论的索引。由[[user:Talkindexbot|]]定期更新。\n</noinclude>\n\n== 最近%s天在Wikipedia对话页上的讨论 ==\n''' % days
            elif rcns == '7':
                basewiki=u'''<noinclude>\n{{talkindex panel}}\n本页是中文维基百科图像对话页上活跃讨论的索引。由[[user:Talkindexbot|]]定期更新。\n</noinclude>\n\n== 最近%s天在图像对话页上的讨论 ==\n''' % days
            elif rcns == '9':        
                basewiki=u'''<noinclude>\n{{talkindex panel}}\n本页是中文维基百科Mediawiki(系统界面)对话页上活跃讨论的索引。由[[user:Talkindexbot|]]定期更新。\n</noinclude>\n\n== 最近%s天在Mediawiki对话页上的讨论 ==\n''' % days
            elif rcns == '11':
                basewiki=u'''<noinclude>\n{{talkindex panel}}\n本页是中文维基百科模板对话页上活跃讨论的索引。由[[user:Talkindexbot|]]定期更新。\n</noinclude>\n\n== 最近%s天在模板对话页上的讨论 ==\n''' % days
            elif rcns == '13':
                basewiki=u'''<noinclude>\n{{talkindex panel}}\n本页是中文维基百科帮助对话页上活跃讨论的索引。由[[user:Talkindexbot|]]定期更新。\n</noinclude>\n\n== 最近%s天在帮助对话页上的讨论 ==\n''' % days
            elif rcns == '15':
                basewiki=u'''<noinclude>\n{{talkindex panel}}\n本页是中文维基百科分类对话页上活跃讨论的索引。由[[user:Talkindexbot|]]定期更新。\n</noinclude>\n\n== 最近%s天在分类对话页上的讨论 ==\n''' % days
            elif rcns == '101':
                basewiki=u'''<noinclude>\n{{talkindex panel}}\n本页是中文维基百科主题(Portal)对话页上活跃讨论的索引。由[[user:Talkindexbot|]]定期更新。\n</noinclude>\n\n== 最近%s天在主题对话页上的讨论 ==\n''' % days
 
 
            wikipedia.output(u'Namespace: %s: Total %s talk in %s days' % (rcns, n, days))
            Lists = "".join(tempList)
            tempList=[]
            if Lists == "":
                basewiki=basewiki+u'当前没有活跃的讨论。最后更新于~~~~~'
                comment=u"Bot更新讨论索引:最近%s天,Namespace %s 上没有活跃的讨论" % (days, rcns)
            else:
                basewiki=basewiki+(u'当前共有%s项讨论。最后更新于~~~~~\n' % n)+ Lists
                comment=u"Bot更新讨论索引:最近%s天内,Namespace %s 上共有%s项讨论" % (days, rcns, n)
            pg=wikipedia.Page(site,wiki)
 
            pg.put(basewiki, comment, minorEdit=False)
            n=0
        CurrentDiscussion()
 
        hours=4
        now = time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime())
        wikipedia.output(u'\nDone.')
        wikipedia.stopme()
        wikipedia.output(u'\nSleeping %s hours, now %s' % (hours, now))
        time.sleep(hours *60 *60)
#X天范围内的讨论索引
days=7
#run
try:
    run()
finally:
    wikipedia.stopme()

--Shizhao 12:12, 5 March 2008 (UTC)

Personal tools
Share