1 # -*- coding: utf-8 -*-
3 from xml.dom import minidom
4 from datetime import datetime, timedelta
7 from django.utils.translation import ugettext as _
8 from django.template.defaultfilters import slugify
9 from forum.models.utils import dbsafe_encode
14 for node in el.childNodes:
15 if node.nodeType == node.TEXT_NODE:
19 msstrip = re.compile(r'^(.*)\.\d+')
21 noms = msstrip.match(ts)
25 return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])
28 return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])
30 def readTable(dump, name):
31 return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]
33 google_accounts_lookup = re.compile(r'^https?://www.google.com/accounts/')
34 yahoo_accounts_lookup = re.compile(r'^https?://me.yahoo.com/a/')
37 re.compile(r'^https?://www.google.com/profiles/(?P<uname>\w+(\.\w+)*)/?$'),
38 re.compile(r'^https?://me.yahoo.com/(?P<uname>\w+(\.\w+)*)/?$'),
39 re.compile(r'^https?://openid.aol.com/(?P<uname>\w+(\.\w+)*)/?$'),
40 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).myopenid.com/?$'),
41 re.compile(r'^https?://flickr.com/(\w+/)*(?P<uname>\w+(\.\w+)*)/?$'),
42 re.compile(r'^https?://technorati.com/people/technorati/(?P<uname>\w+(\.\w+)*)/?$'),
43 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).wordpress.com/?$'),
44 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).blogspot.com/?$'),
45 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).livejournal.com/?$'),
46 re.compile(r'^https?://claimid.com/(?P<uname>\w+(\.\w+)*)/?$'),
47 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).pip.verisignlabs.com/?$'),
48 re.compile(r'^https?://getopenid.com/(?P<uname>\w+(\.\w+)*)/?$'),
49 re.compile(r'^https?://[\w\.]+/(\w+/)*(?P<uname>\w+(\.\w+)*)/?$'),
50 re.compile(r'^https?://(?P<uname>[\w\.]+)/?$'),
53 def final_username_attempt(sxu):
54 openid = sxu.get('openid', None)
57 if google_accounts_lookup.search(openid):
58 return UnknownGoogleUser(sxu.get('id'))
59 if yahoo_accounts_lookup.search(openid):
60 return UnknownYahooUser(sxu.get('id'))
62 for lookup in openid_lookups:
63 if lookup.search(openid):
64 return lookup.search(openid).group('uname')
66 return UnknownUser(sxu.get('id'))
68 class UnknownUser(object):
69 def __init__(self, id):
73 return _("user-%(id)s") % {'id': self._id}
75 def __unicode__(self):
76 return self.__str__()
78 def encode(self, *args):
79 return self.__str__()
81 class UnknownGoogleUser(UnknownUser):
83 return _("user-%(id)s (google)") % {'id': self._id}
85 class UnknownYahooUser(UnknownUser):
87 return _("user-%(id)s (yahoo)") % {'id': self._id}
90 class IdMapper(dict):
91 def __getitem__(self, key):
93 return super(IdMapper, self).get(key, 1)
95 def __setitem__(self, key, value):
96 super(IdMapper, self).__setitem__(int(key), int(value))
98 openidre = re.compile('^https?\:\/\/')
99 def userimport(dump, options):
100 users = readTable(dump, "Users")
103 uidmapper = IdMapper()
106 owneruid = options.get('owneruid', None)
107 #check for empty values
114 if sxu.get('id') == '-1':
117 if int(sxu.get('id')) == int(owneruid):
118 osqau = orm.User.objects.get(id=1)
119 uidmapper[owneruid] = 1
123 username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', final_username_attempt(sxu))))
125 if not isinstance(username, UnknownUser) and username in user_by_name:
126 #if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:
127 # osqau = user_by_name[username]
129 # uidmapper[sxu.get('id')] = osqau.id
132 while ("%s %d" % (username, inc)) in user_by_name:
135 username = "%s %d" % (username, inc)
137 sxbadges = sxu.get('badgesummary', None)
138 badges = {'1':'0','2':'0','3':'0'}
141 badges.update(dict([b.split('=') for b in sxbadges.split()]))
145 id = sxu.get('id'),
146 username = unicode(username),
148 email = sxu.get('email', ''),
149 is_superuser = sxu.get('usertypeid') == '5',
150 is_staff = sxu.get('usertypeid') == '4',
152 date_joined = readTime(sxu.get('creationdate')),
153 last_seen = readTime(sxu.get('lastaccessdate')),
154 about = sxu.get('aboutme', ''),
155 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,
156 email_isvalid = int(sxu.get('usertypeid')) > 2,
157 website = sxu.get('websiteurl', ''),
158 reputation = int(sxu.get('reputation')),
159 gold = int(badges['1']),
160 silver = int(badges['2']),
161 bronze = int(badges['3']),
162 real_name = sxu.get('realname', ''),
163 location = sxu.get('location', ''),
168 user_joins = orm.Action(
169 action_type = "userjoins",
170 action_date = osqau.date_joined,
175 rep = orm.ActionRepute(
178 date = osqau.date_joined,
179 action = user_joins
184 orm.SubscriptionSettings.objects.get(user=osqau)
186 s = orm.SubscriptionSettings(user=osqau)
189 uidmapper[osqau.id] = osqau.id
191 new_about = sxu.get('aboutme', None)
192 if new_about and osqau.about != new_about:
194 osqau.about = "%s\n|\n%s" % (osqau.about, new_about)
196 osqau.about = new_about
198 osqau.username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', final_username_attempt(sxu))))
199 osqau.email = sxu.get('email', '')
200 osqau.reputation += int(sxu.get('reputation'))
201 osqau.gold += int(badges['1'])
202 osqau.silver += int(badges['2'])
203 osqau.bronze += int(badges['3'])
205 osqau.date_joined = readTime(sxu.get('creationdate'))
206 osqau.website = sxu.get('websiteurl', '')
207 osqau.date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None
208 osqau.location = sxu.get('location', '')
209 osqau.real_name = sxu.get('realname', '')
211 merged_users.append(osqau.id)
214 user_by_name[osqau.username] = osqau
216 openid = sxu.get('openid', None)
217 if openid and openidre.match(openid):
218 assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")
221 if uidmapper[-1] == -1:
224 return (uidmapper, merged_users)
226 def tagsimport(dump, uidmap):
227 tags = readTable(dump, "Tags")
233 id = int(sxtag['id']),
234 name = sxtag['name'],
235 used_count = int(sxtag['count']),
236 created_by_id = uidmap[sxtag.get('userid', 1)],
240 tagmap[otag.name] = otag
244 def postimport(dump, uidmap, tagmap):
249 for h in readTable(dump, "PostHistory"):
250 if not history.get(h.get('postid'), None):
251 history[h.get('postid')] = []
253 history[h.get('postid')].append(h)
255 posts = readTable(dump, "Posts")
257 for sxpost in posts:
258 nodetype = (sxpost.get('posttypeid') == '1') and "nodetype" or "answer"
261 node_type = nodetype,
263 added_at = readTime(sxpost['creationdate']),
264 body = sxpost['body'],
265 score = sxpost.get('score', 0),
266 author_id = sxpost.get('deletiondate', None) and 1 or uidmap[sxpost['owneruserid']]
271 create_action = orm.Action(
272 action_type = (nodetype == "nodetype") and "ask" or "answer",
273 user_id = post.author_id,
275 action_date = post.added_at
278 create_action.save()
280 #if sxpost.get('deletiondate', None):
281 # delete_action = orm.Action(
282 # action_type = "delete",
285 # action_date = readTime(sxpost['deletiondate'])
288 # delete_action.save()
289 # post.deleted = delete_action
291 if sxpost.get('lasteditoruserid', None):
292 revise_action = orm.Action(
293 action_type = "revise",
294 user_id = uidmap[sxpost.get('lasteditoruserid')],
296 action_date = readTime(sxpost['lasteditdate']),
299 revise_action.save()
300 post.last_edited = revise_action
302 if sxpost.get('communityowneddate', None):
305 wikify_action = orm.Action(
306 action_type = "wikify",
309 action_date = readTime(sxpost['communityowneddate'])
312 wikify_action.save()
315 if sxpost.get('lastactivityuserid', None):
316 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]
317 post.last_activity_at = readTime(sxpost['lastactivitydate'])
320 if sxpost.get('posttypeid') == '1': #question
321 post.node_type = "question"
322 post.title = sxpost['title']
324 tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à ', '')
325 post.tagnames = tagnames
327 post.extra_count = sxpost.get('viewcount', 0)
329 #if sxpost.get('closeddate', None):
330 # post.marked = True
332 # close_action = orm.Action(
333 # action_type = "close",
336 # action_date = datetime.now() - timedelta(days=7)
339 # close_action.save()
340 # post.extra_action = close_action
342 #if sxpost.get('acceptedanswerid', None):
343 # accepted[int(sxpost.get('acceptedanswerid'))] = post
348 post.parent_id = sxpost['parentid']
350 #if int(post.id) in accepted:
351 #post.marked = True
353 #accept_action = orm.Action(
354 # action_type = "acceptanswer",
355 # user_id = accepted[int(post.id)].author_id,
357 # action_date = datetime.now() - timedelta(days=7)
360 #accept_action.save()
363 #post.accepted_at = datetime.now()
364 #post.accepted_by_id = accepted[int(post.id)].author_id
366 #accepted[int(post.id)].extra_ref = post
367 #accepted[int(post.id)].save()
371 all[int(post.id)] = post
375 def comment_import(dump, uidmap, posts):
376 comments = readTable(dump, "PostComments")
377 currid = max(posts.keys())
380 for sxc in comments:
384 node_type = "comment",
385 added_at = readTime(sxc['creationdate']),
386 author_id = uidmap[sxc.get('userid', 1)],
387 body = sxc['text'],
388 parent_id = sxc.get('postid'),
391 if sxc.get('deletiondate', None):
392 delete_action = orm.Action(
393 action_type = "delete",
394 user_id = uidmap[sxc['deletionuserid']],
395 action_date = readTime(sxc['deletiondate'])
398 oc.author_id = uidmap[sxc['deletionuserid']]
401 delete_action.node = oc
402 delete_action.save()
404 oc.deleted = delete_action
406 oc.author_id = uidmap[sxc.get('userid', 1)]
409 create_action = orm.Action(
410 action_type = "comment",
411 user_id = oc.author_id,
413 action_date = oc.added_at
416 create_action.save()
420 mapping[int(sxc['id'])] = int(oc.id)
422 return posts, mapping
425 def add_tags_to_posts(posts, tagmap):
426 for post in posts.values():
427 if post.node_type == "question":
428 tags = [tag for tag in [tagmap.get(name.strip()) for name in post.tagnames.split(u' ') if name] if tag]
429 post.tagnames = " ".join([t.name for t in tags]).strip()
432 create_and_activate_revision(post)
435 def create_and_activate_revision(post):
436 rev = orm.NodeRevision(
437 author_id = post.author_id,
440 revised_at = post.added_at,
442 summary = 'Initial revision',
443 tagnames = post.tagnames,
444 title = post.title,
448 post.active_revision_id = rev.id
451 def post_vote_import(dump, uidmap, posts):
452 votes = readTable(dump, "Posts2Votes")
453 close_reasons = dict([(r['id'], r['name']) for r in readTable(dump, "CloseReasons")])
458 action = orm.Action(
459 user_id=uidmap[sxv['userid']],
460 action_date = readTime(sxv['creationdate']),
463 node = posts.get(int(sxv['postid']), None)
464 if not node: continue
467 if sxv['votetypeid'] == '1':
469 question = posts.get(int(answer.parent_id), None)
471 action.action_type = "acceptanswer"
474 answer.marked = True
475 answer.extra_action = action
477 question.extra_ref_id = answer.id
482 elif sxv['votetypeid'] in ('2', '3'):
483 if not (action.node.id, action.user_id) in user2vote:
484 user2vote.append((action.node.id, action.user_id))
486 action.action_type = (sxv['votetypeid'] == '2') and "voteup" or "votedown"
490 node_id = action.node.id,
491 user_id = action.user_id,
492 voted_at = action.action_date,
493 value = sxv['votetypeid'] == '2' and 1 or -1,
498 action.action_type = "unknown"
501 elif sxv['votetypeid'] in ('4', '12', '13'):
502 action.action_type = "flag"
506 node = action.node,
507 user_id = action.user_id,
508 flagged_at = action.action_date,
515 elif sxv['votetypeid'] == '5':
516 action.action_type = "favorite"
519 elif sxv['votetypeid'] == '6':
520 action.action_type = "close"
521 action.extra = dbsafe_encode(close_reasons[sxv['comment']])
525 node.extra_action = action
528 elif sxv['votetypeid'] == '7':
529 action.action_type = "unknown"
532 node.marked = False
533 node.extra_action = None
536 elif sxv['votetypeid'] == '10':
537 action.action_type = "delete"
540 node.deleted = action
543 elif sxv['votetypeid'] == '11':
544 action.action_type = "unknown"
547 node.deleted = None
551 action.action_type = "unknown"
555 if sxv.get('targetrepchange', None):
556 rep = orm.ActionRepute(
558 date = action.action_date,
559 user_id = uidmap[sxv['targetuserid']],
560 value = int(sxv['targetrepchange'])
565 if sxv.get('voterrepchange', None):
566 rep = orm.ActionRepute(
568 date = action.action_date,
569 user_id = uidmap[sxv['userid']],
570 value = int(sxv['voterrepchange'])
576 def comment_vote_import(dump, uidmap, comments, posts):
577 votes = readTable(dump, "Comments2Votes")
581 if sxv['votetypeid'] == "2":
582 comment_id = comments[int(sxv['postcommentid'])]
583 user_id = uidmap[sxv['userid']]
585 if not (comment_id, user_id) in user2vote:
586 user2vote.append((comment_id, user_id))
588 action = orm.Action(
589 action_type = "voteupcomment",
591 action_date = readTime(sxv['creationdate']),
592 node_id = comment_id
597 node_id = comment_id,
599 voted_at = action.action_date,
606 posts[int(action.node_id)].score += 1
607 posts[int(action.node_id)].save()
611 def badges_import(dump, uidmap, post_list):
612 node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')
613 obadges = dict([(b.cls, b) for b in orm.Badge.objects.all()])
614 sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])
615 user_badge_count = {}
619 for id, sxb in sxbadges.items():
620 cls = "".join(sxb['name'].replace('&', 'And').split(' '))
623 sx_to_osqa[id] = obadges[cls]
628 type = sxb['class']
631 sx_to_osqa[id] = osqab
633 sxawards = readTable(dump, "Users2Badges")
636 for sxa in sxawards:
637 badge = sx_to_osqa[int(sxa['badgeid'])]
639 user_id = uidmap[sxa['userid']]
640 if not user_badge_count.get(user_id, None):
641 user_badge_count[user_id] = 0
643 action = orm.Action(
644 action_type = "award",
646 action_date = readTime(sxa['date'])
652 user_id = uidmap[sxa['userid']],
654 node = post_list[user_badge_count[user_id]],
655 awarded_at = action.action_date,
660 badge.awarded_count += 1
661 user_badge_count[user_id] += 1
663 for badge in obadges.values():
667 def reset_sequences():
668 from south.db import db
669 if db.backend_name == "postgres":
670 db.start_transaction()
671 db.execute_many(PG_SEQUENCE_RESETS)
672 db.commit_transaction()
674 def sximport(dump, options):
675 uidmap, merged_users = userimport(dump, options)
676 tagmap = tagsimport(dump, uidmap)
677 posts = postimport(dump, uidmap, tagmap)
678 posts, comments = comment_import(dump, uidmap, posts)
679 add_tags_to_posts(posts, tagmap)
680 post_vote_import(dump, uidmap, posts)
681 comment_vote_import(dump, uidmap, comments, posts)
682 badges_import(dump, uidmap, posts.values())
684 from south.db import db
685 db.commit_transaction()
692 SELECT setval('"auth_user_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user";
693 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";
694 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";
695 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";
696 SELECT setval('"forum_action_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_action";
697 SELECT setval('"forum_actionrepute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_actionrepute";
698 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";
699 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";
700 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";
701 SELECT setval('"forum_tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_tag";
702 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";
703 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";
704 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
705 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";
706 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
707 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";
708 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
709 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
710 SELECT setval('"forum_vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_vote";
711 SELECT setval('"forum_flag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_flag";
712 SELECT setval('"forum_badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_badge";
713 SELECT setval('"forum_award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_award";
714 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";
715 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";