1 # -*- coding: utf-8 -*-
\r
3 from xml.dom import minidom
\r
4 from datetime import datetime, timedelta
\r
7 from django.utils.translation import ugettext as _
\r
8 from django.template.defaultfilters import slugify
\r
9 from forum.models.utils import dbsafe_encode
\r
14 for node in el.childNodes:
\r
15 if node.nodeType == node.TEXT_NODE:
\r
19 msstrip = re.compile(r'^(.*)\.\d+')
\r
21 noms = msstrip.match(ts)
\r
25 return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])
\r
28 return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])
\r
30 def readTable(dump, name):
\r
31 return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]
\r
33 google_accounts_lookup = re.compile(r'^https?://www.google.com/accounts/')
\r
34 yahoo_accounts_lookup = re.compile(r'^https?://me.yahoo.com/a/')
\r
37 re.compile(r'^https?://www.google.com/profiles/(?P<uname>\w+(\.\w+)*)/?$'),
\r
38 re.compile(r'^https?://me.yahoo.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
39 re.compile(r'^https?://openid.aol.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
40 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).myopenid.com/?$'),
\r
41 re.compile(r'^https?://flickr.com/(\w+/)*(?P<uname>\w+(\.\w+)*)/?$'),
\r
42 re.compile(r'^https?://technorati.com/people/technorati/(?P<uname>\w+(\.\w+)*)/?$'),
\r
43 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).wordpress.com/?$'),
\r
44 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).blogspot.com/?$'),
\r
45 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).livejournal.com/?$'),
\r
46 re.compile(r'^https?://claimid.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
47 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).pip.verisignlabs.com/?$'),
\r
48 re.compile(r'^https?://getopenid.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
49 re.compile(r'^https?://[\w\.]+/(\w+/)*(?P<uname>\w+(\.\w+)*)/?$'),
\r
50 re.compile(r'^https?://(?P<uname>[\w\.]+)/?$'),
\r
53 def final_username_attempt(sxu):
\r
54 openid = sxu.get('openid', None)
\r
57 if google_accounts_lookup.search(openid):
\r
58 return UnknownGoogleUser(sxu.get('id'))
\r
59 if yahoo_accounts_lookup.search(openid):
\r
60 return UnknownYahooUser(sxu.get('id'))
\r
62 for lookup in openid_lookups:
\r
63 if lookup.search(openid):
\r
64 return lookup.search(openid).group('uname')
\r
66 return UnknownUser(sxu.get('id'))
\r
68 class UnknownUser(object):
\r
69 def __init__(self, id):
\r
73 return _("user-%(id)s") % {'id': self._id}
\r
75 def __unicode__(self):
\r
76 return self.__str__()
\r
78 def encode(self, *args):
\r
79 return self.__str__()
\r
81 class UnknownGoogleUser(UnknownUser):
\r
83 return _("user-%(id)s (google)") % {'id': self._id}
\r
85 class UnknownYahooUser(UnknownUser):
\r
87 return _("user-%(id)s (yahoo)") % {'id': self._id}
\r
90 class IdMapper(dict):
\r
91 def __getitem__(self, key):
\r
93 return super(IdMapper, self).get(key, 1)
\r
95 def __setitem__(self, key, value):
\r
96 super(IdMapper, self).__setitem__(int(key), int(value))
\r
98 openidre = re.compile('^https?\:\/\/')
\r
99 def userimport(dump, options):
\r
100 users = readTable(dump, "Users")
\r
103 uidmapper = IdMapper()
\r
106 owneruid = options.get('owneruid', None)
\r
107 #check for empty values
\r
114 if sxu.get('id') == '-1':
\r
117 if int(sxu.get('id')) == int(owneruid):
\r
118 osqau = orm.User.objects.get(id=1)
\r
119 uidmapper[owneruid] = 1
\r
123 username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', final_username_attempt(sxu))))
\r
125 if not isinstance(username, UnknownUser) and username in user_by_name:
\r
126 #if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:
\r
127 # osqau = user_by_name[username]
\r
129 # uidmapper[sxu.get('id')] = osqau.id
\r
132 while ("%s %d" % (username, inc)) in user_by_name:
\r
135 username = "%s %d" % (username, inc)
\r
137 sxbadges = sxu.get('badgesummary', None)
\r
138 badges = {'1':'0','2':'0','3':'0'}
\r
141 badges.update(dict([b.split('=') for b in sxbadges.split()]))
\r
145 id = sxu.get('id'),
\r
146 username = unicode(username),
\r
148 email = sxu.get('email', ''),
\r
149 is_superuser = sxu.get('usertypeid') == '5',
\r
150 is_staff = sxu.get('usertypeid') == '4',
\r
152 date_joined = readTime(sxu.get('creationdate')),
\r
153 last_seen = readTime(sxu.get('lastaccessdate')),
\r
154 about = sxu.get('aboutme', ''),
\r
155 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,
\r
156 email_isvalid = int(sxu.get('usertypeid')) > 2,
\r
157 website = sxu.get('websiteurl', ''),
\r
158 reputation = int(sxu.get('reputation')),
\r
159 gold = int(badges['1']),
\r
160 silver = int(badges['2']),
\r
161 bronze = int(badges['3']),
\r
162 real_name = sxu.get('realname', ''),
\r
167 user_joins = orm.Action(
\r
168 action_type = "userjoins",
\r
169 action_date = osqau.date_joined,
\r
174 rep = orm.ActionRepute(
\r
177 date = osqau.date_joined,
\r
178 action = user_joins
\r
183 orm.SubscriptionSettings.objects.get(user=osqau)
\r
185 s = orm.SubscriptionSettings(user=osqau)
\r
188 uidmapper[osqau.id] = osqau.id
\r
190 new_about = sxu.get('aboutme', None)
\r
191 if new_about and osqau.about != new_about:
\r
193 osqau.about = "%s\n|\n%s" % (osqau.about, new_about)
\r
195 osqau.about = new_about
\r
197 osqau.username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', final_username_attempt(sxu))))
\r
198 osqau.email = sxu.get('email', '')
\r
199 osqau.reputation += int(sxu.get('reputation'))
\r
200 osqau.gold += int(badges['1'])
\r
201 osqau.silver += int(badges['2'])
\r
202 osqau.bronze += int(badges['3'])
\r
204 merged_users.append(osqau.id)
\r
207 user_by_name[osqau.username] = osqau
\r
209 openid = sxu.get('openid', None)
\r
210 if openid and openidre.match(openid):
\r
211 assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")
\r
214 if uidmapper[-1] == -1:
\r
217 return (uidmapper, merged_users)
\r
219 def tagsimport(dump, uidmap):
\r
220 tags = readTable(dump, "Tags")
\r
226 id = int(sxtag['id']),
\r
227 name = sxtag['name'],
\r
228 used_count = int(sxtag['count']),
\r
229 created_by_id = uidmap[sxtag.get('userid', 1)],
\r
233 tagmap[otag.name] = otag
\r
237 def postimport(dump, uidmap, tagmap):
\r
242 for h in readTable(dump, "PostHistory"):
\r
243 if not history.get(h.get('postid'), None):
\r
244 history[h.get('postid')] = []
\r
246 history[h.get('postid')].append(h)
\r
248 posts = readTable(dump, "Posts")
\r
250 for sxpost in posts:
\r
251 nodetype = (sxpost.get('posttypeid') == '1') and "nodetype" or "answer"
\r
254 node_type = nodetype,
\r
256 added_at = readTime(sxpost['creationdate']),
\r
257 body = sxpost['body'],
\r
258 score = sxpost.get('score', 0),
\r
259 author_id = sxpost.get('deletiondate', None) and 1 or uidmap[sxpost['owneruserid']]
\r
264 create_action = orm.Action(
\r
265 action_type = (nodetype == "nodetype") and "ask" or "answer",
\r
266 user_id = post.author_id,
\r
268 action_date = post.added_at
\r
271 create_action.save()
\r
273 #if sxpost.get('deletiondate', None):
\r
274 # delete_action = orm.Action(
\r
275 # action_type = "delete",
\r
278 # action_date = readTime(sxpost['deletiondate'])
\r
281 # delete_action.save()
\r
282 # post.deleted = delete_action
\r
284 if sxpost.get('lasteditoruserid', None):
\r
285 revise_action = orm.Action(
\r
286 action_type = "revise",
\r
287 user_id = uidmap[sxpost.get('lasteditoruserid')],
\r
289 action_date = readTime(sxpost['lasteditdate']),
\r
292 revise_action.save()
\r
293 post.last_edited = revise_action
\r
295 if sxpost.get('communityowneddate', None):
\r
298 wikify_action = orm.Action(
\r
299 action_type = "wikify",
\r
302 action_date = readTime(sxpost['communityowneddate'])
\r
305 wikify_action.save()
\r
308 if sxpost.get('lastactivityuserid', None):
\r
309 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]
\r
310 post.last_activity_at = readTime(sxpost['lastactivitydate'])
\r
313 if sxpost.get('posttypeid') == '1': #question
\r
314 post.node_type = "question"
\r
315 post.title = sxpost['title']
\r
317 tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à ', '')
\r
318 post.tagnames = tagnames
\r
320 post.extra_count = sxpost.get('viewcount', 0)
\r
322 #if sxpost.get('closeddate', None):
\r
323 # post.marked = True
\r
325 # close_action = orm.Action(
\r
326 # action_type = "close",
\r
329 # action_date = datetime.now() - timedelta(days=7)
\r
332 # close_action.save()
\r
333 # post.extra_action = close_action
\r
335 #if sxpost.get('acceptedanswerid', None):
\r
336 # accepted[int(sxpost.get('acceptedanswerid'))] = post
\r
341 post.parent_id = sxpost['parentid']
\r
343 #if int(post.id) in accepted:
\r
344 #post.marked = True
\r
346 #accept_action = orm.Action(
\r
347 # action_type = "acceptanswer",
\r
348 # user_id = accepted[int(post.id)].author_id,
\r
350 # action_date = datetime.now() - timedelta(days=7)
\r
353 #accept_action.save()
\r
356 #post.accepted_at = datetime.now()
\r
357 #post.accepted_by_id = accepted[int(post.id)].author_id
\r
359 #accepted[int(post.id)].extra_ref = post
\r
360 #accepted[int(post.id)].save()
\r
364 all[int(post.id)] = post
\r
368 def comment_import(dump, uidmap, posts):
\r
369 comments = readTable(dump, "PostComments")
\r
370 currid = max(posts.keys())
\r
373 for sxc in comments:
\r
377 node_type = "comment",
\r
378 added_at = readTime(sxc['creationdate']),
\r
379 author_id = uidmap[sxc.get('userid', 1)],
\r
380 body = sxc['text'],
\r
381 parent_id = sxc.get('postid'),
\r
384 if sxc.get('deletiondate', None):
\r
385 delete_action = orm.Action(
\r
386 action_type = "delete",
\r
387 user_id = uidmap[sxc['deletionuserid']],
\r
388 action_date = readTime(sxc['deletiondate'])
\r
391 oc.author_id = uidmap[sxc['deletionuserid']]
\r
394 delete_action.node = oc
\r
395 delete_action.save()
\r
397 oc.deleted = delete_action
\r
399 oc.author_id = uidmap[sxc.get('userid', 1)]
\r
402 create_action = orm.Action(
\r
403 action_type = "comment",
\r
404 user_id = oc.author_id,
\r
406 action_date = oc.added_at
\r
409 create_action.save()
\r
413 mapping[int(sxc['id'])] = int(oc.id)
\r
415 return posts, mapping
\r
418 def add_tags_to_posts(posts, tagmap):
\r
419 for post in posts.values():
\r
420 if post.node_type == "question":
\r
421 tags = [tag for tag in [tagmap.get(name.strip()) for name in post.tagnames.split(u' ') if name] if tag]
\r
422 post.tagnames = " ".join([t.name for t in tags]).strip()
\r
425 create_and_activate_revision(post)
\r
428 def create_and_activate_revision(post):
\r
429 rev = orm.NodeRevision(
\r
430 author_id = post.author_id,
\r
433 revised_at = post.added_at,
\r
435 summary = 'Initial revision',
\r
436 tagnames = post.tagnames,
\r
437 title = post.title,
\r
441 post.active_revision_id = rev.id
\r
444 def post_vote_import(dump, uidmap, posts):
\r
445 votes = readTable(dump, "Posts2Votes")
\r
446 close_reasons = dict([(r['id'], r['name']) for r in readTable(dump, "CloseReasons")])
\r
451 action = orm.Action(
\r
452 user_id=uidmap[sxv['userid']],
\r
453 action_date = readTime(sxv['creationdate']),
\r
456 node = posts.get(int(sxv['postid']), None)
\r
457 if not node: continue
\r
460 if sxv['votetypeid'] == '1':
\r
462 question = posts.get(int(answer.parent_id), None)
\r
464 action.action_type = "acceptanswer"
\r
467 answer.marked = True
\r
468 answer.extra_action = action
\r
470 question.extra_ref_id = answer.id
\r
475 elif sxv['votetypeid'] in ('2', '3'):
\r
476 if not (action.node.id, action.user_id) in user2vote:
\r
477 user2vote.append((action.node.id, action.user_id))
\r
479 action.action_type = (sxv['votetypeid'] == '2') and "voteup" or "votedown"
\r
483 node_id = action.node.id,
\r
484 user_id = action.user_id,
\r
485 voted_at = action.action_date,
\r
486 value = sxv['votetypeid'] == '2' and 1 or -1,
\r
491 action.action_type = "unknown"
\r
494 elif sxv['votetypeid'] in ('4', '12', '13'):
\r
495 action.action_type = "flag"
\r
499 node = action.node,
\r
500 user_id = action.user_id,
\r
501 flagged_at = action.action_date,
\r
508 elif sxv['votetypeid'] == '5':
\r
509 action.action_type = "favorite"
\r
512 elif sxv['votetypeid'] == '6':
\r
513 action.action_type = "close"
\r
514 action.extra = dbsafe_encode(close_reasons[sxv['comment']])
\r
518 node.extra_action = action
\r
521 elif sxv['votetypeid'] == '7':
\r
522 action.action_type = "unknown"
\r
525 node.marked = False
\r
526 node.extra_action = None
\r
529 elif sxv['votetypeid'] == '10':
\r
530 action.action_type = "delete"
\r
533 node.deleted = action
\r
536 elif sxv['votetypeid'] == '11':
\r
537 action.action_type = "unknown"
\r
540 node.deleted = None
\r
544 action.action_type = "unknown"
\r
548 if sxv.get('targetrepchange', None):
\r
549 rep = orm.ActionRepute(
\r
551 date = action.action_date,
\r
552 user_id = uidmap[sxv['targetuserid']],
\r
553 value = int(sxv['targetrepchange'])
\r
558 if sxv.get('voterrepchange', None):
\r
559 rep = orm.ActionRepute(
\r
561 date = action.action_date,
\r
562 user_id = uidmap[sxv['userid']],
\r
563 value = int(sxv['voterrepchange'])
\r
569 def comment_vote_import(dump, uidmap, comments, posts):
\r
570 votes = readTable(dump, "Comments2Votes")
\r
574 if sxv['votetypeid'] == "2":
\r
575 comment_id = comments[int(sxv['postcommentid'])]
\r
576 user_id = uidmap[sxv['userid']]
\r
578 if not (comment_id, user_id) in user2vote:
\r
579 user2vote.append((comment_id, user_id))
\r
581 action = orm.Action(
\r
582 action_type = "voteupcomment",
\r
584 action_date = readTime(sxv['creationdate']),
\r
585 node_id = comment_id
\r
590 node_id = comment_id,
\r
592 voted_at = action.action_date,
\r
599 posts[int(action.node_id)].score += 1
\r
600 posts[int(action.node_id)].save()
\r
604 def badges_import(dump, uidmap, post_list):
\r
605 node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')
\r
606 obadges = dict([(b.cls, b) for b in orm.Badge.objects.all()])
\r
607 sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])
\r
608 user_badge_count = {}
\r
612 for id, sxb in sxbadges.items():
\r
613 cls = "".join(sxb['name'].replace('&', 'And').split(' '))
\r
616 sx_to_osqa[id] = obadges[cls]
\r
621 type = sxb['class']
\r
624 sx_to_osqa[id] = osqab
\r
626 sxawards = readTable(dump, "Users2Badges")
\r
629 for sxa in sxawards:
\r
630 badge = sx_to_osqa[int(sxa['badgeid'])]
\r
632 user_id = uidmap[sxa['userid']]
\r
633 if not user_badge_count.get(user_id, None):
\r
634 user_badge_count[user_id] = 0
\r
636 action = orm.Action(
\r
637 action_type = "award",
\r
639 action_date = readTime(sxa['date'])
\r
645 user_id = uidmap[sxa['userid']],
\r
647 node = post_list[user_badge_count[user_id]],
\r
648 awarded_at = action.action_date,
\r
653 badge.awarded_count += 1
\r
654 user_badge_count[user_id] += 1
\r
656 for badge in obadges.values():
\r
660 def reset_sequences():
\r
661 from south.db import db
\r
662 if db.backend_name == "postgres":
\r
663 db.start_transaction()
\r
664 db.execute_many(PG_SEQUENCE_RESETS)
\r
665 db.commit_transaction()
\r
667 def sximport(dump, options):
\r
668 uidmap, merged_users = userimport(dump, options)
\r
669 tagmap = tagsimport(dump, uidmap)
\r
670 posts = postimport(dump, uidmap, tagmap)
\r
671 posts, comments = comment_import(dump, uidmap, posts)
\r
672 add_tags_to_posts(posts, tagmap)
\r
673 post_vote_import(dump, uidmap, posts)
\r
674 comment_vote_import(dump, uidmap, comments, posts)
\r
675 badges_import(dump, uidmap, posts.values())
\r
677 from south.db import db
\r
678 db.commit_transaction()
\r
684 PG_SEQUENCE_RESETS = """
\r
685 SELECT setval('"auth_user_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user";
\r
686 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";
\r
687 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";
\r
688 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";
\r
689 SELECT setval('"forum_action_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_action";
\r
690 SELECT setval('"forum_actionrepute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_actionrepute";
\r
691 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";
\r
692 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";
\r
693 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";
\r
694 SELECT setval('"forum_tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_tag";
\r
695 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";
\r
696 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";
\r
697 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
698 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";
\r
699 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
700 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";
\r
701 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
702 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
703 SELECT setval('"forum_vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_vote";
\r
704 SELECT setval('"forum_flag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_flag";
\r
705 SELECT setval('"forum_badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_badge";
\r
706 SELECT setval('"forum_award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_award";
\r
707 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";
\r
708 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";
\r