[wikia-pywikibot] / GE-ExportImport-Bot.py Repository:

Annotation of /GE-ExportImport-Bot.py

Parent Directory Parent Directory Revision Log Revision Log


Revision 3 - View Download

1 : dantman 3 #!/usr/bin/env python
2 :     # -*- coding: utf-8 -*-
3 :     """
4 :     This bot is used to Export pages from Wikipedia, alter them, then Import them to another wiki.
5 :     Info: http://en.anime.wikia.com/wiki/Project:Bots/ExportImport
6 :     """
7 :    
8 :     import sys, re
9 :     import wikipedia, pagegenerators, catlib, config
10 :     from time import *
11 :     import xml
12 :     import xml.dom.minidom as minidom
13 :     from xml.dom.minidom import Node
14 :    
15 :     class GEExport:
16 :     def __init__(self, pageGenerator):
17 :     self.pageGenerator = pageGenerator
18 :    
19 :     def exportPage(self, page):
20 :     response = None
21 :     data = None
22 :     wp = wikipedia.getSite(code=u'en', fam=u'wikipedia')
23 :     address = wp.export_address()
24 :     title = page.sectionFreeTitle().encode(wp.encoding())
25 :     predata = {
26 :     'action': 'submit',
27 :     'pages': title,
28 :     'offset': '1',
29 :     }
30 :     #if True is True:#Future Loop marker
31 :     while True:
32 :     wikipedia.get_throttle()
33 :     wikipedia.output('\03{lightpurple}>>\03{default} \03{lightaqua}Exporting revisions.\03{default}')
34 :     # Now make the actual request to the server
35 :     now = time()
36 :     if wp.hostname() in config.authenticate.keys():
37 :     predata["Content-type"] = "application/x-www-form-urlencoded"
38 :     predata["User-agent"] = wikipedia.useragent
39 :     data = wp.urlEncode(predata)
40 :     response = urllib2.urlopen(urllib2.Request(wp.protocol() + '://' + wp.hostname() + address, data))
41 :     data = response.read()
42 :     else:
43 :     response, data = wp.postForm(address, predata)
44 :     data = data.encode(wp.encoding())
45 :     wikipedia.get_throttle.setDelay(time() - now)
46 :    
47 :     doc = minidom.parseString(data)
48 :     revs = doc.getElementsByTagName('revision')
49 :     revCount = len(revs)
50 :     if revCount > 0:
51 :     lastRev = revs[len(revs)-1].getElementsByTagName('timestamp')[0]
52 :     timestamp = ''
53 :     for nodes in lastRev.childNodes:
54 :     if nodes.nodeType == Node.TEXT_NODE:
55 :     timestamp += nodes.data
56 :     wikipedia.output('\03{lightpurple}>>\03{default} \03{lightaqua}Got %s revisions up to %s.\03{default}' % (revCount,timestamp))
57 :     fileName = 'wpdumps/%s-%s.xml' % (title.replace('/','-'),predata['offset'].replace(':','-'))
58 :     wikipedia.output('\03{lightpurple}>>\03{default} \03{lightblue}Saving to %s.\03{default}' % fileName)
59 :     f = open(fileName, 'w')
60 :     f.write(data)
61 :     f.close()
62 :     predata['offset'] = timestamp
63 :     else:
64 :     wikipedia.output('\03{lightpurple}>>\03{default} \03{lightaqua}Returned no revisions, exporting for this page is complete.\03{default}')
65 :     break
66 :    
67 :     def run(self):
68 :    
69 :     wikipedia.output(u'\03{lightblue}Running Export bot.\03{default}')
70 :     for page in self.pageGenerator:
71 :     wikipedia.output('\03{lightpurple}>\03{default} \03{lightaqua}Doing \03{lightpurple}%s\03{default}' % page.aslink())
72 :     self.exportPage(page)
73 :    
74 :     class GEImport:
75 :     def run(self):
76 :     wikipedia.output(u'\03{lightblue}Running Import bot.\03{default}')
77 :    
78 :     def main():
79 :     bot = None
80 :     action = None
81 :    
82 :     # This factory is responsible for processing command line arguments
83 :     # that are also used by other scripts and that determine on which pages
84 :     # to work on.
85 :     genFactory = pagegenerators.GeneratorFactory()
86 :     gen = None
87 :    
88 :     for arg in wikipedia.handleArgs():
89 :     if action == None:
90 :     action = arg
91 :     else:
92 :     generator = genFactory.handleArg(arg)
93 :     if generator:
94 :     gen = generator
95 :    
96 :     if action == 'export':
97 :     if gen == None:
98 :     wikipedia.output(u'\03{lightred}Export bot needs a page generator to itterate over.\03{default}')
99 :     return
100 :     bot = GEExport(gen)
101 :     elif action == 'import':
102 :     bot = GEImport()
103 :     if bot == None:
104 :     wikipedia.output(u'\03{lightred}Invalid bot action to run.\03{default}')
105 :     return
106 :     bot.run()
107 :    
108 :     if __name__ == "__main__":
109 :     try:
110 :     main()
111 :     finally:
112 :     wikipedia.stopme()

svn@nadir-point.com
Subversion  TortoiseSVN  ViewVC