Python:UserContributions.py/aoc

From Botwiki

Jump to: navigation, search
#Adapted from userContributions.py:
 
# PLAN:
# TO RETURN A LIST OF CONTRIBUTIONS OF A USER ON A WIKI,
#  FROM THE api, OR WHATEVER
# EACH ELEMENT IN THE LIST BEING A MULTIPLE OF 
#(0. type='edit'/'pagemove', 1.pageid, 2.revid, 3.ns=namespace, 
#4.title=pagename, 5.timestamp , 6.minor=(boolean or string?), 7.comment=string)  [DO WE NEED ALL THAT?]
#http://en.wikipedia.org/w/api.php?action=query&list=usercontribs&ucuser=YurikBot
#GIVES
#      <item pageid="1887246" revid="75331142" ns="0" title="Louge Town Arc" timestamp="2006-09-12T17:55:46Z" minor="" comment="Robot: Fixing double redirect" />
# IN THE pyWikipedia FRAMEWORK
# THIS FUNCTION SHOULD BE IN wikipedia.py BUT I CANNOT FIND IT
 
## GENERAL SET-UP
#family = 'wikipedia'
#lang = 'en'
moveText =' moved to '  # SEE http://en.wikiversity.org/w/index.php?title=Special%3ALog&type=move&user=&page=
 
### IMPORT
import wikipedia
import re
import urllib
 
### REGEX SET-UP
itemX       = re.compile(ur'&lt;item .*?&gt;', flags=re.U)  # AN ITEM IN THE API
pageidX     = re.compile(ur'(?<=pageid\=&quot;).*?(?=&quot;)', flags=re.U)
revidX      = re.compile(ur'(?<=revid\=&quot;).*?(?=&quot;)', flags=re.U)
nsX         = re.compile(ur'(?<=ns\=&quot;).*?(?=&quot;)', flags=re.U)
titleX      = re.compile(ur'(?<=title\=&quot;).*?(?=&quot;)', flags=re.U)
timestampX  = re.compile(ur'(?<=timestamp\=&quot;).*?(?=&quot;)', flags=re.U)
minorX      = re.compile(ur'(?<=minor\&quot;").*?(?=&quot;)', flags=re.U)
commentX    = re.compile(ur'(?<=comment\=&quot;).*?(?=&quot;)', flags=re.U)
moveX      = re.compile( moveText , flags=re.U)  #SIGNATURE OF A PAGEMOVE (IN THE COMMENT)
 
### FUNCTION TO RETURN A LIST OF CONTRIBUTIONS OF user:userName after timestamplowerlimit
def userContributions(siteName,userName,timestamplowerlimit): #siteName='en.wikiversity'
  """ TO BE CONTINUED 
 
  """
  ## CREATE THE PAGE OBJECT
  apiPage = urllib.urlopen( 'http://' + siteName + '/api.php?action=query&list=usercontribs&ucuser=' + userName)
  ## READ THE PAGE
  x = apiPage.read()
  ## EXTRACT THE INFORMATION
  listItem = itemX.findall(x)
  ## INITIALISE THE INDIVIDUAL "LISTS" AS DICTIONARIES ; WE MAY NOT NEED THEM, BUT ANYHOW
  pageid = {} ; revid = {} ; ns ={} ; title = {} ; timestamp = {} ; minor ={} ; comment = {}
  n = 0
  for i in listItem:   #I DON'T KNOW HOW TO GET THE FIRST AND ONLY MATCH IN STRING i, SO...
    n += 1
    l = pageidX.findall(i)  
    for j in l:
      pageid =j
    l = revidX.findall(i)  
    for j in l:
      revid = j
    l = nsX.findall(i)
    for j in l:
      ns = j
    l = titleX.findall(i)
    for j in l:
      title = unicode(j,'utf-8')
    l = timestampX.findall(i)
    for j in l:
      timestamp = j
    l = minorX.findall(i)
    for j in l:
      minor = j
    comment ='' #JUST IN CASE l  IS EMPTY LIST
    l = commentX.findall(i) 
    for j in l:
      comment = j
    if moveX.findall(comment)==[]: 
      type = 'edit'
    else: 
      type = 'pagemove'
    #print timestamp, timestamplowerlimit - TESTING
    if timestamp > timestamplowerlimit:
      print(type , pageid , revid , ns, title , timestamp , minor , comment) #ALL STRINGS
      yield (type , pageid , revid , ns, title, timestamp , minor , comment) #ALL STRINGS
 
 
### MAIN
 
try:
    if __name__ == '__main__':
      # SET-UP : MY COMMAND LINE DOES NOT SUPPORT UNICODE INPUT
      siteName = 'aoc.wikia.com'
      userName ='hillgentleman'
      timestamplowerlimit='2008-01-01T12:34:56Z'
      x=raw_input('userName?')
      if x!='': userName= x
      a=raw_input('timestamplowerlimit?, e.g. "2008-01-01", or "2008-01-01T12:34:56Z" in full ')
      if a!='': timestamplowerlimit = a
 
      list = userContributions(siteName, userName, timestamplowerlimit)
      for p,q,r,s, t,u,v,w in list:
        print(p,q,r,s,t,u,v,w)  #SOME MAY NOT BE STRINGS - FIX THAT LATER
                              #I DON'T LIKE wikipedia.output()
finally:
    wikipedia.stopme()
Personal tools
In other languages