]> git.openstreetmap.org Git - osqa.git/blob - forum_modules/sximporter/importer.py
the importer was not loading accepted answers correctly
[osqa.git] / forum_modules / sximporter / importer.py
1 # -*- coding: utf-8 -*-\r
2 \r
3 from xml.dom import minidom\r
4 from datetime import datetime\r
5 import time\r
6 import re\r
7 from django.utils.translation import ugettext as _\r
8 from django.template.defaultfilters import slugify\r
9 from orm import orm\r
10 \r
11 def getText(el):\r
12     rc = ""\r
13     for node in el.childNodes:\r
14         if node.nodeType == node.TEXT_NODE:\r
15             rc = rc + node.data\r
16     return rc.strip()\r
17 \r
18 msstrip = re.compile(r'^(.*)\.\d+')\r
19 def readTime(ts):\r
20     noms = msstrip.match(ts)\r
21     if noms:\r
22         ts = noms.group(1)\r
23 \r
24     return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])\r
25 \r
26 def readEl(el):\r
27     return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])\r
28 \r
29 def readTable(dump, name):\r
30     return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]\r
31 \r
32 class UnknownUser(object):\r
33     counter = 0\r
34     def __init__(self):\r
35         UnknownUser.counter += 1\r
36         self.number = UnknownUser.counter\r
37 \r
38     def __str__(self):\r
39         return _("Unknown user %(number)d") % {'number': self.number}\r
40 \r
41     def __unicode__(self):\r
42         return self.__str__()\r
43 \r
44     def encode(self, *args):\r
45         return self.__str__()\r
46 \r
47 class IdMapper(dict):\r
48     def __getitem__(self, key):\r
49         key = int(key)\r
50         return super(IdMapper, self).get(key, key)\r
51 \r
52     def __setitem__(self, key, value):\r
53         super(IdMapper, self).__setitem__(int(key), int(value))\r
54 \r
55 openidre = re.compile('^https?\:\/\/')\r
56 def userimport(dump, options):\r
57     users = readTable(dump, "Users")\r
58 \r
59     user_by_name = {}\r
60     uidmapper = IdMapper()\r
61     merged_users = []\r
62 \r
63     owneruid = options.get('owneruid', None)\r
64     #check for empty values\r
65     if not owneruid:\r
66         owneruid = None\r
67 \r
68     for sxu in users:\r
69         create = True\r
70 \r
71         if sxu.get('id') == '-1':\r
72             continue\r
73 \r
74         if int(sxu.get('id')) == int(owneruid):\r
75             osqau = orm.User.objects.get(id=1)\r
76             uidmapper[owneruid] = 1\r
77             uidmapper[-1] = 1\r
78             create = False\r
79         else:\r
80             username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))\r
81 \r
82             if not isinstance(username, UnknownUser) and username in user_by_name:\r
83                 if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:\r
84                     osqau = user_by_name[username]\r
85                     create = False\r
86                     uidmapper[sxu.get('id')] = osqau.id\r
87                 else:\r
88                     inc = 1\r
89                     while ("%s %d" % (username, inc)) in user_by_name:\r
90                         inc += 1\r
91 \r
92                     username = "%s %d" % (username, inc)\r
93 \r
94         sxbadges = sxu.get('badgesummary', None)\r
95         badges = {'1':'0','2':'0','3':'0'}\r
96 \r
97         if sxbadges:\r
98             badges.update(dict([b.split('=') for b in sxbadges.split()]))\r
99 \r
100         if create:\r
101             osqau = orm.User(\r
102                 id           = sxu.get('id'),\r
103                 username     = unicode(username),\r
104                 password     = '!',\r
105                 email        = sxu.get('email', ''),\r
106                 is_superuser = sxu.get('usertypeid') == '5',\r
107                 is_staff     = sxu.get('usertypeid') == '4',\r
108                 is_active    = True,\r
109                 date_joined  = readTime(sxu.get('creationdate')),\r
110                 about         = sxu.get('aboutme', ''),\r
111                 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,\r
112                 email_isvalid = int(sxu.get('usertypeid')) > 2,\r
113                 website       = sxu.get('websiteurl', ''),\r
114                 reputation    = int(sxu.get('reputation')),\r
115                 gold          = int(badges['1']),\r
116                 silver        = int(badges['2']),\r
117                 bronze        = int(badges['3']),\r
118                 real_name     = sxu.get('realname', ''),\r
119             )\r
120 \r
121             osqau.save()\r
122 \r
123             s = orm.SubscriptionSettings(user=osqau)\r
124             s.save()\r
125 \r
126             user_by_name[osqau.username] = osqau\r
127         else:\r
128             new_about = sxu.get('aboutme', None)\r
129             if new_about and osqau.about != new_about:\r
130                 if osqau.about:\r
131                     osqau.about = "%s\n|\n%s" % (osqau.about, new_about)\r
132                 else:\r
133                     osqau.about = new_about\r
134 \r
135             osqau.username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))\r
136             osqau.email = sxu.get('email', '')\r
137             osqau.reputation += int(sxu.get('reputation'))\r
138             osqau.gold += int(badges['1'])\r
139             osqau.silver += int(badges['2'])\r
140             osqau.bronze += int(badges['3'])\r
141 \r
142             merged_users.append(osqau.id)\r
143             osqau.save()\r
144 \r
145 \r
146         openid = sxu.get('openid', None)\r
147         if openid and openidre.match(openid):\r
148             assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")\r
149             assoc.save()\r
150 \r
151     if uidmapper[-1] == -1:\r
152         uidmapper[-1] = 1\r
153 \r
154     return (uidmapper, merged_users)\r
155 \r
156 def tagsimport(dump, uidmap):\r
157     tags = readTable(dump, "Tags")\r
158 \r
159     tagmap = {}\r
160 \r
161     for sxtag in tags:\r
162         otag = orm.Tag(\r
163             id = int(sxtag['id']),\r
164             name = sxtag['name'],\r
165             used_count = int(sxtag['count']),\r
166             created_by_id = uidmap[sxtag.get('userid', 1)],\r
167         )\r
168         otag.save()\r
169 \r
170         tagmap[otag.name] = otag\r
171 \r
172     return tagmap\r
173 \r
174 def postimport(dump, uidmap, tagmap):\r
175     history = {}\r
176     accepted = {}\r
177     all = {}\r
178 \r
179     for h in readTable(dump, "PostHistory"):\r
180         if not history.get(h.get('postid'), None):\r
181             history[h.get('postid')] = []\r
182 \r
183         history[h.get('postid')].append(h)\r
184 \r
185     posts = readTable(dump, "Posts")\r
186 \r
187     for sxpost in posts:\r
188         postclass = sxpost.get('posttypeid') == '1' and orm.Question or orm.Answer\r
189 \r
190         post = postclass(\r
191             id = sxpost['id'],\r
192             added_at = readTime(sxpost['creationdate']),\r
193             body = sxpost['body'],\r
194             score = sxpost.get('score', 0),\r
195             vote_up_count = 0,\r
196             vote_down_count = 0\r
197         )\r
198 \r
199         if sxpost.get('deletiondate', None):\r
200             post.deleted = True\r
201             post.deleted_at = readTime(sxpost['deletiondate'])\r
202             post.author_id = 1\r
203         else:\r
204             post.author_id = uidmap[sxpost['owneruserid']]\r
205 \r
206         if sxpost.get('lasteditoruserid', None):\r
207             post.last_edited_by_id = uidmap[sxpost.get('lasteditoruserid')]\r
208             post.last_edited_at = readTime(sxpost['lasteditdate'])\r
209 \r
210         if sxpost.get('communityowneddate', None):\r
211             post.wiki = True\r
212             post.wikified_at = readTime(sxpost['communityowneddate'])\r
213 \r
214         if sxpost.get('posttypeid') == '1': #question\r
215             post.node_type = "question"\r
216             post.title = sxpost['title']\r
217 \r
218             tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à', '')\r
219             post.tagnames = tagnames\r
220 \r
221             post.view_count = sxpost.get('viewcount', 0)\r
222             post.favourite_count = sxpost.get('favoritecount', 0)\r
223             post.answer_count = sxpost.get('answercount', 0)\r
224 \r
225             if sxpost.get('lastactivityuserid', None):\r
226                 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]\r
227                 post.last_activity_at = readTime(sxpost['lastactivitydate'])\r
228 \r
229             if sxpost.get('closeddate', None):\r
230                 post.closed = True\r
231                 post.closed_by_id = 1\r
232                 post.closed_at = datetime.now()\r
233 \r
234             if sxpost.get('acceptedanswerid', None):\r
235                 post.accepted_answer_id = int(sxpost.get('acceptedanswerid'))\r
236                 accepted[int(sxpost.get('acceptedanswerid'))] = post\r
237 \r
238         else:\r
239             post.node_type = "answer"\r
240             post.parent_id = sxpost['parentid']\r
241 \r
242             if int(post.id) in accepted:\r
243                 post.accepted = True\r
244                 post.accepted_at = datetime.now()\r
245                 post.accepted_by_id = accepted[int(post.id)].author_id\r
246 \r
247         all[int(post.id)] = post\r
248 \r
249     return all\r
250 \r
251 def comment_import(dump, uidmap, posts):\r
252     comments = readTable(dump, "PostComments")\r
253     currid = max(posts.keys())\r
254     mapping = {}\r
255 \r
256     for sxc in comments:\r
257         currid += 1\r
258         oc = orm.Node(\r
259             id = currid,\r
260             node_type = "comment",\r
261             added_at = readTime(sxc['creationdate']),\r
262             author_id = uidmap[sxc['userid']],\r
263             body = sxc['text'],\r
264             parent_id = sxc.get('postid'),\r
265             vote_up_count = 0,\r
266             vote_down_count = 0\r
267         )\r
268 \r
269         if sxc.get('deletiondate', None):\r
270             oc.deleted = True\r
271             oc.deleted_at = readTime(sxc['deletiondate'])\r
272             oc.deleted_by_id = uidmap[sxc['deletionuserid']]\r
273             oc.author_id = uidmap[sxc['deletionuserid']]\r
274         else:\r
275             oc.author_id = uidmap[sxc['userid']]\r
276 \r
277 \r
278         posts[oc.id] = oc\r
279         mapping[int(sxc['id'])] = int(oc.id)\r
280 \r
281     return posts, mapping\r
282 \r
283 \r
284 def save_posts(posts, tagmap):\r
285     for post in posts.values():\r
286         post.save()\r
287 \r
288         if post.node_type == "question":\r
289             tags = filter(lambda t: t is not None, [tagmap.get(n, None) for n in post.tagnames.split()])\r
290             post.tagnames = " ".join([t.name for t in tags]).strip()\r
291             post.tags = tags\r
292 \r
293         create_and_activate_revision(post)\r
294 \r
295 \r
296 def create_and_activate_revision(post):\r
297     rev = orm.NodeRevision(\r
298         author_id = post.author_id,\r
299         body = post.body,\r
300         node_id = post.id,\r
301         revised_at = post.added_at,\r
302         revision = 1,\r
303         summary = 'Initial revision',\r
304         tagnames = post.tagnames,\r
305         title = post.title,\r
306     )\r
307 \r
308     rev.save()\r
309     post.active_revision_id = rev.id\r
310     post.save()\r
311 \r
312 \r
313 def post_vote_import(dump, uidmap, posts):\r
314     votes = readTable(dump, "Posts2Votes")\r
315 \r
316     for sxv in votes:\r
317         if sxv['votetypeid'] in ('2', '3'):\r
318             ov = orm.Vote(\r
319                 node_id = sxv['postid'],\r
320                 user_id = uidmap[sxv['userid']],\r
321                 voted_at = readTime(sxv['creationdate']),\r
322                 vote = sxv['votetypeid'] == '2' and 1 or -1,\r
323             )\r
324 \r
325             if sxv['votetypeid'] == '2':\r
326                 posts[int(sxv['postid'])].vote_up_count += 1\r
327             else:\r
328                 posts[int(sxv['postid'])].vote_down_count += 1\r
329 \r
330             ov.save()\r
331 \r
332 def comment_vote_import(dump, uidmap, comments, posts):\r
333     votes = readTable(dump, "Comments2Votes")\r
334 \r
335     for sxv in votes:\r
336         if sxv['votetypeid'] in ('2', '3'):\r
337             ov = orm.Vote(\r
338                 node_id = comments[int(sxv['postcommentid'])],\r
339                 user_id = uidmap[sxv['userid']],\r
340                 voted_at = readTime(sxv['creationdate']),\r
341                 vote = sxv['votetypeid'] == '2' and 1 or -1,\r
342             )\r
343 \r
344             if sxv['votetypeid'] == '2':\r
345                 posts[comments[int(sxv['postcommentid'])]].vote_up_count += 1\r
346             else:\r
347                 posts[comments[int(sxv['postcommentid'])]].vote_down_count += 1\r
348 \r
349             ov.save()\r
350 \r
351 \r
352 \r
353 def badges_import(dump, uidmap):\r
354     node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')\r
355     obadges = dict([(b.slug, b) for b in orm.Badge.objects.all()])\r
356     sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])\r
357 \r
358     sx_to_osqa = {}\r
359 \r
360     for id, sxb in sxbadges.items():\r
361         slug = slugify(sxb['name'].replace('&', 'and'))\r
362         if slug in obadges:\r
363             sx_to_osqa[id] = obadges[slug]\r
364         else:\r
365             osqab = orm.Badge(\r
366                 name = sxb['name'],\r
367                 slug = slugify(sxb['name']),\r
368                 description = sxb['description'],\r
369                 multiple = sxb.get('single', 'false') == 'false',\r
370                 awarded_count = 0,\r
371                 type = sxb['class']                \r
372             )\r
373             osqab.save()\r
374             sx_to_osqa[id] = osqab\r
375 \r
376     sxawards = readTable(dump, "Users2Badges")\r
377     osqaawards = []\r
378 \r
379     for sxa in sxawards:\r
380         badge = sx_to_osqa[int(sxa['badgeid'])]\r
381         osqaa = orm.Award(\r
382             user_id = uidmap[sxa['userid']],\r
383             badge = badge,\r
384             content_type = node_ctype,\r
385             object_id = 1\r
386         )\r
387 \r
388         osqaawards.append(osqaa)\r
389         badge.awarded_count += 1\r
390 \r
391     for b in sx_to_osqa.values():\r
392         b.save()\r
393 \r
394     for a in osqaawards:\r
395         a.save()\r
396 \r
397 \r
398 def reset_sequences():\r
399     from south.db import db\r
400     if db.backend_name == "postgres":\r
401         db.start_transaction()\r
402         db.execute_many(PG_SEQUENCE_RESETS)\r
403         db.commit_transaction()\r
404 \r
405 def sximport(dump, options):\r
406     uidmap, merged_users = userimport(dump, options)\r
407     tagmap = tagsimport(dump, uidmap)\r
408     posts = postimport(dump, uidmap, tagmap)\r
409     posts, comments = comment_import(dump, uidmap, posts)\r
410     save_posts(posts, tagmap)\r
411     post_vote_import(dump, uidmap, posts)\r
412     comment_vote_import(dump, uidmap, comments, posts)\r
413     for post in posts.values():\r
414         post.save()\r
415     badges_import(dump, uidmap)\r
416 \r
417     from south.db import db\r
418     db.commit_transaction()\r
419 \r
420     reset_sequences()\r
421 \r
422     \r
423     \r
424 PG_SEQUENCE_RESETS = """\r
425 SELECT setval('"auth_user_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user";\r
426 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";\r
427 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";\r
428 SELECT setval('"activity_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "activity";\r
429 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";\r
430 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";\r
431 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";\r
432 SELECT setval('"tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "tag";\r
433 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";\r
434 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";\r
435 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
436 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";\r
437 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
438 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
439 SELECT setval('"favorite_question_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "favorite_question";\r
440 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";\r
441 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
442 SELECT setval('"vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "vote";\r
443 SELECT setval('"flagged_item_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "flagged_item";\r
444 SELECT setval('"badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "badge";\r
445 SELECT setval('"award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "award";\r
446 SELECT setval('"repute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "repute";\r
447 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
448 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";\r
449 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";\r
450 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";\r
451 """\r
452 \r
453 \r
454     \r
455