Purple exclamation mark.svg Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.

Red exclamation mark.svg UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.

Python:UserContributions.py

From Botwiki
Jump to: navigation, search
# PLAN:
# TO RETURN A LIST OF CONTRIBUTIONS OF A USER ON A WIKI,
#  FROM THE api, OR WHATEVER
# EACH ELEMENT IN THE LIST BEING A MULTIPLE OF 
#(0. type='edit'/'pagemove', 1.pageid, 2.revid, 3.ns=namespace, 
#4.title=pagename, 5.timestamp , 6.minor=(boolean or string?), 7.comment=string)  [DO WE NEED ALL THAT?]
#http://en.wikipedia.org/w/api.php?action=query&list=usercontribs&ucuser=YurikBot
#GIVES
#      <item pageid="1887246" revid="75331142" ns="0" title="Louge Town Arc" timestamp="2006-09-12T17:55:46Z" minor="" comment="Robot: Fixing double redirect" />
# IN THE pyWikipedia FRAMEWORK
# THIS FUNCTION SHOULD BE IN wikipedia.py BUT I CANNOT FIND IT
 
## GENERAL SET-UP
family = 'wikipedia'
lang = 'en'
moveText =' moved to '  # SEE http://en.wikiversity.org/w/index.php?title=Special%3ALog&type=move&user=&page=
 
### IMPORT
import wikipedia
import re
import urllib
 
### REGEX SET-UP
itemX       = re.compile(ur'&lt;item .*?&gt;', flags=re.U)  # AN ITEM IN THE API
pageidX     = re.compile(ur'(?<=pageid\=&quot;).*?(?=&quot;)', flags=re.U)
revidX      = re.compile(ur'(?<=revid\=&quot;).*?(?=&quot;)', flags=re.U)
nsX         = re.compile(ur'(?<=ns\=&quot;).*?(?=&quot;)', flags=re.U)
titleX      = re.compile(ur'(?<=title\=&quot;).*?(?=&quot;)', flags=re.U)
timestampX  = re.compile(ur'(?<=timestamp\=&quot;).*?(?=&quot;)', flags=re.U)
minorX      = re.compile(ur'(?<=minor\&quot;").*?(?=&quot;)', flags=re.U)
commentX    = re.compile(ur'(?<=comment\=&quot;).*?(?=&quot;)', flags=re.U)
moveX      = re.compile( moveText , flags=re.U)  #SIGNATURE OF A PAGEMOVE (IN THE COMMENT)
 
### FUNCTION TO RETURN A LIST OF CONTRIBUTIONS OF user:userName after timestamplowerlimit
def userContributions(siteName,userName,timestamplowerlimit): #siteName='en.wikiversity'
    """ TO BE CONTINUED 
 
    """
    ## CREATE THE PAGE OBJECT
    apiPage = urllib.urlopen( 'http://' + siteName + '/w/api.php?action=query&list=usercontribs&ucuser=' + userName)
    ## READ THE PAGE
    x = apiPage.read()
    ## EXTRACT THE INFORMATION
    listItem = itemX.findall(x)
    ## INITIALISE THE INDIVIDUAL "LISTS" AS DICTIONARIES ; WE MAY NOT NEED THEM, BUT ANYHOW
    pageid = {} ; revid = {} ; ns ={} ; title = {} ; timestamp = {} ; minor ={} ; comment = {}
    n = 0
    for i in listItem:   #I DON'T KNOW HOW TO GET THE FIRST AND ONLY MATCH IN STRING i, SO...
        n += 1
        l = pageidX.findall(i)  
        for j in l:
            pageid =j
    l = revidX.findall(i)  
    for j in l:
        revid = j
    l = nsX.findall(i)
    for j in l:
        ns = j
    l = titleX.findall(i)
    for j in l:
        title = j
    l = timestampX.findall(i)
    for j in l:
        timestamp = j
    l = minorX.findall(i)
    for j in l:
        minor = j
    comment ='' #JUST IN CASE l  IS EMPTY LIST
    l = commentX.findall(i) 
    for j in l:
        comment = j
    if moveX.findall(comment)==[]: 
        type = 'edit'
    else: 
        type = 'pagemove'
    #print timestamp, timestamplowerlimit - TESTING
    if timestamp > timestamplowerlimit:
        print(type , pageid , revid , ns, title , timestamp , minor , comment) #ALL STRINGS
        yield (type , pageid , revid , ns, title , timestamp , minor , comment) #ALL STRINGS
 
 
### MAIN
 
try:
    if __name__ == '__main__':
        # SET-UP : MY COMMAND LINE DOES NOT SUPPORT UNICODE INPUT
        siteName = lang + '.' + family + '.org'
        userName ='hillgentleman1'
        timestamplowerlimit='2008-01-01T12:34:56Z'
        x=raw_input('userName?')
        if x!='': userName= x
        a=raw_input('timestamplowerlimit?, e.g. "2008-01-01", or "2008-01-01T12:34:56Z" in full ')
        if a!='': timestamplowerlimit = a
 
        list = userContributions(siteName, userName, timestamplowerlimit)
        for p,q,r,s, t,u,v,w in list:
            print(p,q,r,s,t,u,v,w)  #SOME MAY NOT BE STRINGS - FIX THAT LATER
                                  #I DON'T LIKE wikipedia.output()
finally:
    wikipedia.stopme()
Personal tools
Share