1 # -*- coding: utf-8 -*-
\r
3 from xml.dom import minidom
\r
4 from datetime import datetime
\r
7 from django.utils.translation import ugettext as _
\r
8 from django.template.defaultfilters import slugify
\r
13 for node in el.childNodes:
\r
14 if node.nodeType == node.TEXT_NODE:
\r
18 msstrip = re.compile(r'^(.*)\.\d+')
\r
20 noms = msstrip.match(ts)
\r
24 return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])
\r
27 return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])
\r
29 def readTable(dump, name):
\r
30 return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]
\r
32 class UnknownUser(object):
\r
35 UnknownUser.counter += 1
\r
36 self.number = UnknownUser.counter
\r
39 return _("Unknown user %(number)d") % {'number': self.number}
\r
41 def __unicode__(self):
\r
42 return self.__str__()
\r
44 def encode(self, *args):
\r
45 return self.__str__()
\r
47 class IdMapper(dict):
\r
48 def __getitem__(self, key):
\r
50 return super(IdMapper, self).get(key, 1)
\r
52 def __setitem__(self, key, value):
\r
53 super(IdMapper, self).__setitem__(int(key), int(value))
\r
55 openidre = re.compile('^https?\:\/\/')
\r
56 def userimport(dump, options):
\r
57 users = readTable(dump, "Users")
\r
60 uidmapper = IdMapper()
\r
63 owneruid = options.get('owneruid', None)
\r
64 #check for empty values
\r
71 if sxu.get('id') == '-1':
\r
74 if int(sxu.get('id')) == int(owneruid):
\r
75 osqau = orm.User.objects.get(id=1)
\r
76 uidmapper[owneruid] = 1
\r
80 username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))
\r
82 if not isinstance(username, UnknownUser) and username in user_by_name:
\r
83 #if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:
\r
84 # osqau = user_by_name[username]
\r
86 # uidmapper[sxu.get('id')] = osqau.id
\r
89 while ("%s %d" % (username, inc)) in user_by_name:
\r
92 username = "%s %d" % (username, inc)
\r
94 sxbadges = sxu.get('badgesummary', None)
\r
95 badges = {'1':'0','2':'0','3':'0'}
\r
98 badges.update(dict([b.split('=') for b in sxbadges.split()]))
\r
102 id = sxu.get('id'),
\r
103 username = unicode(username),
\r
105 email = sxu.get('email', ''),
\r
106 is_superuser = sxu.get('usertypeid') == '5',
\r
107 is_staff = sxu.get('usertypeid') == '4',
\r
109 date_joined = readTime(sxu.get('creationdate')),
\r
110 about = sxu.get('aboutme', ''),
\r
111 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,
\r
112 email_isvalid = int(sxu.get('usertypeid')) > 2,
\r
113 website = sxu.get('websiteurl', ''),
\r
114 reputation = int(sxu.get('reputation')),
\r
115 gold = int(badges['1']),
\r
116 silver = int(badges['2']),
\r
117 bronze = int(badges['3']),
\r
118 real_name = sxu.get('realname', ''),
\r
124 orm.SubscriptionSettings.objects.get(user=osqau)
\r
126 s = orm.SubscriptionSettings(user=osqau)
\r
129 uidmapper[osqau.id] = osqau.id
\r
131 new_about = sxu.get('aboutme', None)
\r
132 if new_about and osqau.about != new_about:
\r
134 osqau.about = "%s\n|\n%s" % (osqau.about, new_about)
\r
136 osqau.about = new_about
\r
138 osqau.username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))
\r
139 osqau.email = sxu.get('email', '')
\r
140 osqau.reputation += int(sxu.get('reputation'))
\r
141 osqau.gold += int(badges['1'])
\r
142 osqau.silver += int(badges['2'])
\r
143 osqau.bronze += int(badges['3'])
\r
145 merged_users.append(osqau.id)
\r
148 user_by_name[osqau.username] = osqau
\r
150 openid = sxu.get('openid', None)
\r
151 if openid and openidre.match(openid):
\r
152 assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")
\r
155 if uidmapper[-1] == -1:
\r
158 return (uidmapper, merged_users)
\r
160 def tagsimport(dump, uidmap):
\r
161 tags = readTable(dump, "Tags")
\r
167 id = int(sxtag['id']),
\r
168 name = sxtag['name'],
\r
169 used_count = int(sxtag['count']),
\r
170 created_by_id = uidmap[sxtag.get('userid', 1)],
\r
174 tagmap[otag.name] = otag
\r
178 def postimport(dump, uidmap, tagmap):
\r
183 for h in readTable(dump, "PostHistory"):
\r
184 if not history.get(h.get('postid'), None):
\r
185 history[h.get('postid')] = []
\r
187 history[h.get('postid')].append(h)
\r
189 posts = readTable(dump, "Posts")
\r
191 for sxpost in posts:
\r
192 postclass = sxpost.get('posttypeid') == '1' and orm.Question or orm.Answer
\r
196 added_at = readTime(sxpost['creationdate']),
\r
197 body = sxpost['body'],
\r
198 score = sxpost.get('score', 0),
\r
200 vote_down_count = 0
\r
203 if sxpost.get('deletiondate', None):
\r
204 post.deleted = True
\r
205 post.deleted_at = readTime(sxpost['deletiondate'])
\r
208 post.author_id = uidmap[sxpost['owneruserid']]
\r
210 if sxpost.get('lasteditoruserid', None):
\r
211 post.last_edited_by_id = uidmap[sxpost.get('lasteditoruserid')]
\r
212 post.last_edited_at = readTime(sxpost['lasteditdate'])
\r
214 if sxpost.get('communityowneddate', None):
\r
216 post.wikified_at = readTime(sxpost['communityowneddate'])
\r
218 if sxpost.get('posttypeid') == '1': #question
\r
219 post.node_type = "question"
\r
220 post.title = sxpost['title']
\r
222 tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à', '')
\r
223 post.tagnames = tagnames
\r
225 post.view_count = sxpost.get('viewcount', 0)
\r
226 post.favourite_count = sxpost.get('favoritecount', 0)
\r
227 post.answer_count = sxpost.get('answercount', 0)
\r
229 if sxpost.get('lastactivityuserid', None):
\r
230 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]
\r
231 post.last_activity_at = readTime(sxpost['lastactivitydate'])
\r
233 if sxpost.get('closeddate', None):
\r
235 post.closed_by_id = 1
\r
236 post.closed_at = datetime.now()
\r
238 if sxpost.get('acceptedanswerid', None):
\r
239 post.accepted_answer_id = int(sxpost.get('acceptedanswerid'))
\r
240 accepted[int(sxpost.get('acceptedanswerid'))] = post
\r
243 post.node_type = "answer"
\r
244 post.parent_id = sxpost['parentid']
\r
246 if int(post.id) in accepted:
\r
247 post.accepted = True
\r
248 post.accepted_at = datetime.now()
\r
249 post.accepted_by_id = accepted[int(post.id)].author_id
\r
251 all[int(post.id)] = post
\r
255 def comment_import(dump, uidmap, posts):
\r
256 comments = readTable(dump, "PostComments")
\r
257 currid = max(posts.keys())
\r
260 for sxc in comments:
\r
264 node_type = "comment",
\r
265 added_at = readTime(sxc['creationdate']),
\r
266 author_id = uidmap[sxc.get('userid', 1)],
\r
267 body = sxc['text'],
\r
268 parent_id = sxc.get('postid'),
\r
270 vote_down_count = 0
\r
273 if sxc.get('deletiondate', None):
\r
275 oc.deleted_at = readTime(sxc['deletiondate'])
\r
276 oc.deleted_by_id = uidmap[sxc['deletionuserid']]
\r
277 oc.author_id = uidmap[sxc['deletionuserid']]
\r
279 oc.author_id = uidmap[sxc.get('userid', 1)]
\r
283 mapping[int(sxc['id'])] = int(oc.id)
\r
285 return posts, mapping
\r
288 def save_posts(posts, tagmap):
\r
289 for post in posts.values():
\r
292 if post.node_type == "question":
\r
293 tags = filter(lambda t: t is not None, [tagmap.get(n, None) for n in post.tagnames.split()])
\r
294 post.tagnames = " ".join([t.name for t in tags]).strip()
\r
297 create_and_activate_revision(post)
\r
300 def create_and_activate_revision(post):
\r
301 rev = orm.NodeRevision(
\r
302 author_id = post.author_id,
\r
305 revised_at = post.added_at,
\r
307 summary = 'Initial revision',
\r
308 tagnames = post.tagnames,
\r
309 title = post.title,
\r
313 post.active_revision_id = rev.id
\r
317 def post_vote_import(dump, uidmap, posts):
\r
318 votes = readTable(dump, "Posts2Votes")
\r
321 if sxv['votetypeid'] in ('2', '3'):
\r
323 node_id = sxv['postid'],
\r
324 user_id = uidmap[sxv['userid']],
\r
325 voted_at = readTime(sxv['creationdate']),
\r
326 vote = sxv['votetypeid'] == '2' and 1 or -1,
\r
329 if sxv['votetypeid'] == '2':
\r
330 posts[int(sxv['postid'])].vote_up_count += 1
\r
332 posts[int(sxv['postid'])].vote_down_count += 1
\r
336 def comment_vote_import(dump, uidmap, comments, posts):
\r
337 votes = readTable(dump, "Comments2Votes")
\r
340 if sxv['votetypeid'] in ('2', '3'):
\r
342 node_id = comments[int(sxv['postcommentid'])],
\r
343 user_id = uidmap[sxv['userid']],
\r
344 voted_at = readTime(sxv['creationdate']),
\r
345 vote = sxv['votetypeid'] == '2' and 1 or -1,
\r
348 if sxv['votetypeid'] == '2':
\r
349 posts[comments[int(sxv['postcommentid'])]].vote_up_count += 1
\r
351 posts[comments[int(sxv['postcommentid'])]].vote_down_count += 1
\r
357 def badges_import(dump, uidmap):
\r
358 node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')
\r
359 obadges = dict([(b.slug, b) for b in orm.Badge.objects.all()])
\r
360 sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])
\r
364 for id, sxb in sxbadges.items():
\r
365 slug = slugify(sxb['name'].replace('&', 'and'))
\r
366 if slug in obadges:
\r
367 sx_to_osqa[id] = obadges[slug]
\r
370 name = sxb['name'],
\r
371 slug = slugify(sxb['name']),
\r
372 description = sxb['description'],
\r
373 multiple = sxb.get('single', 'false') == 'false',
\r
375 type = sxb['class']
\r
378 sx_to_osqa[id] = osqab
\r
380 sxawards = readTable(dump, "Users2Badges")
\r
383 for sxa in sxawards:
\r
384 badge = sx_to_osqa[int(sxa['badgeid'])]
\r
386 user_id = uidmap[sxa['userid']],
\r
388 content_type = node_ctype,
\r
392 osqaawards.append(osqaa)
\r
393 badge.awarded_count += 1
\r
395 for b in sx_to_osqa.values():
\r
398 for a in osqaawards:
\r
402 def reset_sequences():
\r
403 from south.db import db
\r
404 if db.backend_name == "postgres":
\r
405 db.start_transaction()
\r
406 db.execute_many(PG_SEQUENCE_RESETS)
\r
407 db.commit_transaction()
\r
409 def sximport(dump, options):
\r
410 uidmap, merged_users = userimport(dump, options)
\r
411 tagmap = tagsimport(dump, uidmap)
\r
412 posts = postimport(dump, uidmap, tagmap)
\r
413 posts, comments = comment_import(dump, uidmap, posts)
\r
414 save_posts(posts, tagmap)
\r
415 post_vote_import(dump, uidmap, posts)
\r
416 comment_vote_import(dump, uidmap, comments, posts)
\r
417 for post in posts.values():
\r
419 badges_import(dump, uidmap)
\r
421 from south.db import db
\r
422 db.commit_transaction()
\r
428 PG_SEQUENCE_RESETS = """
\r
429 SELECT setval('"auth_user_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user";
\r
430 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";
\r
431 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";
\r
432 SELECT setval('"activity_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "activity";
\r
433 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";
\r
434 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";
\r
435 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";
\r
436 SELECT setval('"tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "tag";
\r
437 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";
\r
438 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";
\r
439 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
440 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";
\r
441 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
442 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
443 SELECT setval('"favorite_question_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "favorite_question";
\r
444 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";
\r
445 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
446 SELECT setval('"vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "vote";
\r
447 SELECT setval('"flagged_item_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "flagged_item";
\r
448 SELECT setval('"badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "badge";
\r
449 SELECT setval('"award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "award";
\r
450 SELECT setval('"repute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "repute";
\r
451 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
452 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";
\r
453 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";
\r
454 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";
\r