]> git.openstreetmap.org Git - osqa.git/blob - forum_modules/sximporter/importer.py
7b5450656e91d03cedfab6163c47dfa30f052294
[osqa.git] / forum_modules / sximporter / importer.py
1 # -*- coding: utf-8 -*-\r
2 \r
3 from xml.dom import minidom\r
4 from datetime import datetime\r
5 import time\r
6 import re\r
7 from django.utils.translation import ugettext as _\r
8 from django.template.defaultfilters import slugify\r
9 from orm import orm\r
10 \r
11 def getText(el):\r
12     rc = ""\r
13     for node in el.childNodes:\r
14         if node.nodeType == node.TEXT_NODE:\r
15             rc = rc + node.data\r
16     return rc.strip()\r
17 \r
18 msstrip = re.compile(r'^(.*)\.\d+')\r
19 def readTime(ts):\r
20     noms = msstrip.match(ts)\r
21     if noms:\r
22         ts = noms.group(1)\r
23 \r
24     return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])\r
25 \r
26 def readEl(el):\r
27     return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])\r
28 \r
29 def readTable(dump, name):\r
30     return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]\r
31 \r
32 class UnknownUser(object):\r
33     counter = 0\r
34     def __init__(self):\r
35         UnknownUser.counter += 1\r
36         self.number = UnknownUser.counter\r
37 \r
38     def __str__(self):\r
39         return _("Unknown user %(number)d") % {'number': self.number}\r
40 \r
41     def __unicode__(self):\r
42         return self.__str__()\r
43 \r
44     def encode(self, *args):\r
45         return self.__str__()\r
46 \r
47 class IdMapper(dict):\r
48     def __getitem__(self, key):\r
49         key = int(key)\r
50         return super(IdMapper, self).get(key, 1)\r
51 \r
52     def __setitem__(self, key, value):\r
53         super(IdMapper, self).__setitem__(int(key), int(value))\r
54 \r
55 openidre = re.compile('^https?\:\/\/')\r
56 def userimport(dump, options):\r
57     users = readTable(dump, "Users")\r
58 \r
59     user_by_name = {}\r
60     uidmapper = IdMapper()\r
61     merged_users = []\r
62 \r
63     owneruid = options.get('owneruid', None)\r
64     #check for empty values\r
65     if not owneruid:\r
66         owneruid = None\r
67 \r
68     for sxu in users:\r
69         create = True\r
70 \r
71         if sxu.get('id') == '-1':\r
72             continue\r
73 \r
74         if int(sxu.get('id')) == int(owneruid):\r
75             osqau = orm.User.objects.get(id=1)\r
76             uidmapper[owneruid] = 1\r
77             uidmapper[-1] = 1\r
78             create = False\r
79         else:\r
80             username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))\r
81 \r
82             if not isinstance(username, UnknownUser) and username in user_by_name:\r
83                 #if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:\r
84                 #    osqau = user_by_name[username]\r
85                 #    create = False\r
86                 #    uidmapper[sxu.get('id')] = osqau.id\r
87                 #else:\r
88                 inc = 1\r
89                 while ("%s %d" % (username, inc)) in user_by_name:\r
90                     inc += 1\r
91 \r
92                 username = "%s %d" % (username, inc)\r
93 \r
94         sxbadges = sxu.get('badgesummary', None)\r
95         badges = {'1':'0','2':'0','3':'0'}\r
96 \r
97         if sxbadges:\r
98             badges.update(dict([b.split('=') for b in sxbadges.split()]))\r
99 \r
100         if create:\r
101             osqau = orm.User(\r
102                 id           = sxu.get('id'),\r
103                 username     = unicode(username),\r
104                 password     = '!',\r
105                 email        = sxu.get('email', ''),\r
106                 is_superuser = sxu.get('usertypeid') == '5',\r
107                 is_staff     = sxu.get('usertypeid') == '4',\r
108                 is_active    = True,\r
109                 date_joined  = readTime(sxu.get('creationdate')),\r
110                 about         = sxu.get('aboutme', ''),\r
111                 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,\r
112                 email_isvalid = int(sxu.get('usertypeid')) > 2,\r
113                 website       = sxu.get('websiteurl', ''),\r
114                 reputation    = int(sxu.get('reputation')),\r
115                 gold          = int(badges['1']),\r
116                 silver        = int(badges['2']),\r
117                 bronze        = int(badges['3']),\r
118                 real_name     = sxu.get('realname', ''),\r
119             )\r
120 \r
121             osqau.save()\r
122 \r
123             s = orm.SubscriptionSettings(user=osqau)\r
124             s.save()\r
125 \r
126             uidmapper[osqau.id] = osqau.id\r
127         else:\r
128             new_about = sxu.get('aboutme', None)\r
129             if new_about and osqau.about != new_about:\r
130                 if osqau.about:\r
131                     osqau.about = "%s\n|\n%s" % (osqau.about, new_about)\r
132                 else:\r
133                     osqau.about = new_about\r
134 \r
135             osqau.username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))\r
136             osqau.email = sxu.get('email', '')\r
137             osqau.reputation += int(sxu.get('reputation'))\r
138             osqau.gold += int(badges['1'])\r
139             osqau.silver += int(badges['2'])\r
140             osqau.bronze += int(badges['3'])\r
141 \r
142             merged_users.append(osqau.id)\r
143             osqau.save()\r
144 \r
145         user_by_name[osqau.username] = osqau\r
146 \r
147         openid = sxu.get('openid', None)\r
148         if openid and openidre.match(openid):\r
149             assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")\r
150             assoc.save()\r
151 \r
152     if uidmapper[-1] == -1:\r
153         uidmapper[-1] = 1\r
154 \r
155     return (uidmapper, merged_users)\r
156 \r
157 def tagsimport(dump, uidmap):\r
158     tags = readTable(dump, "Tags")\r
159 \r
160     tagmap = {}\r
161 \r
162     for sxtag in tags:\r
163         otag = orm.Tag(\r
164             id = int(sxtag['id']),\r
165             name = sxtag['name'],\r
166             used_count = int(sxtag['count']),\r
167             created_by_id = uidmap[sxtag.get('userid', 1)],\r
168         )\r
169         otag.save()\r
170 \r
171         tagmap[otag.name] = otag\r
172 \r
173     return tagmap\r
174 \r
175 def postimport(dump, uidmap, tagmap):\r
176     history = {}\r
177     accepted = {}\r
178     all = {}\r
179 \r
180     for h in readTable(dump, "PostHistory"):\r
181         if not history.get(h.get('postid'), None):\r
182             history[h.get('postid')] = []\r
183 \r
184         history[h.get('postid')].append(h)\r
185 \r
186     posts = readTable(dump, "Posts")\r
187 \r
188     for sxpost in posts:\r
189         postclass = sxpost.get('posttypeid') == '1' and orm.Question or orm.Answer\r
190 \r
191         post = postclass(\r
192             id = sxpost['id'],\r
193             added_at = readTime(sxpost['creationdate']),\r
194             body = sxpost['body'],\r
195             score = sxpost.get('score', 0),\r
196             vote_up_count = 0,\r
197             vote_down_count = 0\r
198         )\r
199 \r
200         if sxpost.get('deletiondate', None):\r
201             post.deleted = True\r
202             post.deleted_at = readTime(sxpost['deletiondate'])\r
203             post.author_id = 1\r
204         else:\r
205             post.author_id = uidmap[sxpost['owneruserid']]\r
206 \r
207         if sxpost.get('lasteditoruserid', None):\r
208             post.last_edited_by_id = uidmap[sxpost.get('lasteditoruserid')]\r
209             post.last_edited_at = readTime(sxpost['lasteditdate'])\r
210 \r
211         if sxpost.get('communityowneddate', None):\r
212             post.wiki = True\r
213             post.wikified_at = readTime(sxpost['communityowneddate'])\r
214 \r
215         if sxpost.get('posttypeid') == '1': #question\r
216             post.node_type = "question"\r
217             post.title = sxpost['title']\r
218 \r
219             tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à', '')\r
220             post.tagnames = tagnames\r
221 \r
222             post.view_count = sxpost.get('viewcount', 0)\r
223             post.favourite_count = sxpost.get('favoritecount', 0)\r
224             post.answer_count = sxpost.get('answercount', 0)\r
225 \r
226             if sxpost.get('lastactivityuserid', None):\r
227                 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]\r
228                 post.last_activity_at = readTime(sxpost['lastactivitydate'])\r
229 \r
230             if sxpost.get('closeddate', None):\r
231                 post.closed = True\r
232                 post.closed_by_id = 1\r
233                 post.closed_at = datetime.now()\r
234 \r
235             if sxpost.get('acceptedanswerid', None):\r
236                 post.accepted_answer_id = int(sxpost.get('acceptedanswerid'))\r
237                 accepted[int(sxpost.get('acceptedanswerid'))] = post\r
238 \r
239         else:\r
240             post.node_type = "answer"\r
241             post.parent_id = sxpost['parentid']\r
242 \r
243             if int(post.id) in accepted:\r
244                 post.accepted = True\r
245                 post.accepted_at = datetime.now()\r
246                 post.accepted_by_id = accepted[int(post.id)].author_id\r
247 \r
248         all[int(post.id)] = post\r
249 \r
250     return all\r
251 \r
252 def comment_import(dump, uidmap, posts):\r
253     comments = readTable(dump, "PostComments")\r
254     currid = max(posts.keys())\r
255     mapping = {}\r
256 \r
257     for sxc in comments:\r
258         currid += 1\r
259         oc = orm.Node(\r
260             id = currid,\r
261             node_type = "comment",\r
262             added_at = readTime(sxc['creationdate']),\r
263             author_id = uidmap[sxc.get('userid', 1)],\r
264             body = sxc['text'],\r
265             parent_id = sxc.get('postid'),\r
266             vote_up_count = 0,\r
267             vote_down_count = 0\r
268         )\r
269 \r
270         if sxc.get('deletiondate', None):\r
271             oc.deleted = True\r
272             oc.deleted_at = readTime(sxc['deletiondate'])\r
273             oc.deleted_by_id = uidmap[sxc['deletionuserid']]\r
274             oc.author_id = uidmap[sxc['deletionuserid']]\r
275         else:\r
276             oc.author_id = uidmap[sxc.get('userid', 1)]\r
277 \r
278 \r
279         posts[oc.id] = oc\r
280         mapping[int(sxc['id'])] = int(oc.id)\r
281 \r
282     return posts, mapping\r
283 \r
284 \r
285 def save_posts(posts, tagmap):\r
286     for post in posts.values():\r
287         post.save()\r
288 \r
289         if post.node_type == "question":\r
290             tags = filter(lambda t: t is not None, [tagmap.get(n, None) for n in post.tagnames.split()])\r
291             post.tagnames = " ".join([t.name for t in tags]).strip()\r
292             post.tags = tags\r
293 \r
294         create_and_activate_revision(post)\r
295 \r
296 \r
297 def create_and_activate_revision(post):\r
298     rev = orm.NodeRevision(\r
299         author_id = post.author_id,\r
300         body = post.body,\r
301         node_id = post.id,\r
302         revised_at = post.added_at,\r
303         revision = 1,\r
304         summary = 'Initial revision',\r
305         tagnames = post.tagnames,\r
306         title = post.title,\r
307     )\r
308 \r
309     rev.save()\r
310     post.active_revision_id = rev.id\r
311     post.save()\r
312 \r
313 \r
314 def post_vote_import(dump, uidmap, posts):\r
315     votes = readTable(dump, "Posts2Votes")\r
316 \r
317     for sxv in votes:\r
318         if sxv['votetypeid'] in ('2', '3'):\r
319             ov = orm.Vote(\r
320                 node_id = sxv['postid'],\r
321                 user_id = uidmap[sxv['userid']],\r
322                 voted_at = readTime(sxv['creationdate']),\r
323                 vote = sxv['votetypeid'] == '2' and 1 or -1,\r
324             )\r
325 \r
326             if sxv['votetypeid'] == '2':\r
327                 posts[int(sxv['postid'])].vote_up_count += 1\r
328             else:\r
329                 posts[int(sxv['postid'])].vote_down_count += 1\r
330 \r
331             ov.save()\r
332 \r
333 def comment_vote_import(dump, uidmap, comments, posts):\r
334     votes = readTable(dump, "Comments2Votes")\r
335 \r
336     for sxv in votes:\r
337         if sxv['votetypeid'] in ('2', '3'):\r
338             ov = orm.Vote(\r
339                 node_id = comments[int(sxv['postcommentid'])],\r
340                 user_id = uidmap[sxv['userid']],\r
341                 voted_at = readTime(sxv['creationdate']),\r
342                 vote = sxv['votetypeid'] == '2' and 1 or -1,\r
343             )\r
344 \r
345             if sxv['votetypeid'] == '2':\r
346                 posts[comments[int(sxv['postcommentid'])]].vote_up_count += 1\r
347             else:\r
348                 posts[comments[int(sxv['postcommentid'])]].vote_down_count += 1\r
349 \r
350             ov.save()\r
351 \r
352 \r
353 \r
354 def badges_import(dump, uidmap):\r
355     node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')\r
356     obadges = dict([(b.slug, b) for b in orm.Badge.objects.all()])\r
357     sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])\r
358 \r
359     sx_to_osqa = {}\r
360 \r
361     for id, sxb in sxbadges.items():\r
362         slug = slugify(sxb['name'].replace('&', 'and'))\r
363         if slug in obadges:\r
364             sx_to_osqa[id] = obadges[slug]\r
365         else:\r
366             osqab = orm.Badge(\r
367                 name = sxb['name'],\r
368                 slug = slugify(sxb['name']),\r
369                 description = sxb['description'],\r
370                 multiple = sxb.get('single', 'false') == 'false',\r
371                 awarded_count = 0,\r
372                 type = sxb['class']                \r
373             )\r
374             osqab.save()\r
375             sx_to_osqa[id] = osqab\r
376 \r
377     sxawards = readTable(dump, "Users2Badges")\r
378     osqaawards = []\r
379 \r
380     for sxa in sxawards:\r
381         badge = sx_to_osqa[int(sxa['badgeid'])]\r
382         osqaa = orm.Award(\r
383             user_id = uidmap[sxa['userid']],\r
384             badge = badge,\r
385             content_type = node_ctype,\r
386             object_id = 1\r
387         )\r
388 \r
389         osqaawards.append(osqaa)\r
390         badge.awarded_count += 1\r
391 \r
392     for b in sx_to_osqa.values():\r
393         b.save()\r
394 \r
395     for a in osqaawards:\r
396         a.save()\r
397 \r
398 \r
399 def reset_sequences():\r
400     from south.db import db\r
401     if db.backend_name == "postgres":\r
402         db.start_transaction()\r
403         db.execute_many(PG_SEQUENCE_RESETS)\r
404         db.commit_transaction()\r
405 \r
406 def sximport(dump, options):\r
407     uidmap, merged_users = userimport(dump, options)\r
408     tagmap = tagsimport(dump, uidmap)\r
409     posts = postimport(dump, uidmap, tagmap)\r
410     posts, comments = comment_import(dump, uidmap, posts)\r
411     save_posts(posts, tagmap)\r
412     post_vote_import(dump, uidmap, posts)\r
413     comment_vote_import(dump, uidmap, comments, posts)\r
414     for post in posts.values():\r
415         post.save()\r
416     badges_import(dump, uidmap)\r
417 \r
418     from south.db import db\r
419     db.commit_transaction()\r
420 \r
421     reset_sequences()\r
422 \r
423     \r
424     \r
425 PG_SEQUENCE_RESETS = """\r
426 SELECT setval('"auth_user_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user";\r
427 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";\r
428 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";\r
429 SELECT setval('"activity_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "activity";\r
430 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";\r
431 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";\r
432 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";\r
433 SELECT setval('"tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "tag";\r
434 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";\r
435 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";\r
436 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
437 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";\r
438 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
439 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
440 SELECT setval('"favorite_question_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "favorite_question";\r
441 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";\r
442 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
443 SELECT setval('"vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "vote";\r
444 SELECT setval('"flagged_item_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "flagged_item";\r
445 SELECT setval('"badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "badge";\r
446 SELECT setval('"award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "award";\r
447 SELECT setval('"repute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "repute";\r
448 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
449 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";\r
450 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";\r
451 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";\r
452 """\r
453 \r
454 \r
455     \r
456