1 # -*- coding: utf-8 -*-
\r
3 from xml.dom import minidom
\r
4 from datetime import datetime, timedelta
\r
7 from django.utils.translation import ugettext as _
\r
8 from django.template.defaultfilters import slugify
\r
9 from forum.models.utils import dbsafe_encode
\r
14 for node in el.childNodes:
\r
15 if node.nodeType == node.TEXT_NODE:
\r
19 msstrip = re.compile(r'^(.*)\.\d+')
\r
21 noms = msstrip.match(ts)
\r
25 return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])
\r
28 return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])
\r
30 def readTable(dump, name):
\r
31 return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]
\r
33 google_accounts_lookup = re.compile(r'^https?://www.google.com/accounts/')
\r
34 yahoo_accounts_lookup = re.compile(r'^https?://me.yahoo.com/a/')
\r
37 re.compile(r'^https?://www.google.com/profiles/(?P<uname>\w+(\.\w+)*)/?$'),
\r
38 re.compile(r'^https?://me.yahoo.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
39 re.compile(r'^https?://openid.aol.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
40 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).myopenid.com/?$'),
\r
41 re.compile(r'^https?://flickr.com/(\w+/)*(?P<uname>\w+(\.\w+)*)/?$'),
\r
42 re.compile(r'^https?://technorati.com/people/technorati/(?P<uname>\w+(\.\w+)*)/?$'),
\r
43 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).wordpress.com/?$'),
\r
44 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).blogspot.com/?$'),
\r
45 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).livejournal.com/?$'),
\r
46 re.compile(r'^https?://claimid.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
47 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).pip.verisignlabs.com/?$'),
\r
48 re.compile(r'^https?://getopenid.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
49 re.compile(r'^https?://[\w\.]+/(\w+/)*(?P<uname>\w+(\.\w+)*)/?$'),
\r
50 re.compile(r'^https?://(?P<uname>[\w\.]+)/?$'),
\r
53 def final_username_attempt(sxu):
\r
54 openid = sxu.get('openid', None)
\r
57 if google_accounts_lookup.search(openid):
\r
58 return UnknownGoogleUser(sxu.get('id'))
\r
59 if yahoo_accounts_lookup.search(openid):
\r
60 return UnknownYahooUser(sxu.get('id'))
\r
62 for lookup in openid_lookups:
\r
63 if lookup.search(openid):
\r
64 return lookup.search(openid).group('uname')
\r
66 return UnknownUser(sxu.get('id'))
\r
68 class UnknownUser(object):
\r
69 def __init__(self, id):
\r
73 return _("user-%(id)s") % {'id': self._id}
\r
75 def __unicode__(self):
\r
76 return self.__str__()
\r
78 def encode(self, *args):
\r
79 return self.__str__()
\r
81 class UnknownGoogleUser(UnknownUser):
\r
83 return _("user-%(id)s (google)") % {'id': self._id}
\r
85 class UnknownYahooUser(UnknownUser):
\r
87 return _("user-%(id)s (yahoo)") % {'id': self._id}
\r
90 class IdMapper(dict):
\r
91 def __getitem__(self, key):
\r
93 return super(IdMapper, self).get(key, 1)
\r
95 def __setitem__(self, key, value):
\r
96 super(IdMapper, self).__setitem__(int(key), int(value))
\r
98 openidre = re.compile('^https?\:\/\/')
\r
99 def userimport(dump, options):
\r
100 users = readTable(dump, "Users")
\r
103 uidmapper = IdMapper()
\r
106 owneruid = options.get('owneruid', None)
\r
107 #check for empty values
\r
114 if sxu.get('id') == '-1':
\r
117 if int(sxu.get('id')) == int(owneruid):
\r
118 osqau = orm.User.objects.get(id=1)
\r
119 uidmapper[owneruid] = 1
\r
123 username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', final_username_attempt(sxu))))
\r
125 if not isinstance(username, UnknownUser) and username in user_by_name:
\r
126 #if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:
\r
127 # osqau = user_by_name[username]
\r
129 # uidmapper[sxu.get('id')] = osqau.id
\r
132 while ("%s %d" % (username, inc)) in user_by_name:
\r
135 username = "%s %d" % (username, inc)
\r
137 sxbadges = sxu.get('badgesummary', None)
\r
138 badges = {'1':'0','2':'0','3':'0'}
\r
141 badges.update(dict([b.split('=') for b in sxbadges.split()]))
\r
145 id = sxu.get('id'),
\r
146 username = unicode(username),
\r
148 email = sxu.get('email', ''),
\r
149 is_superuser = sxu.get('usertypeid') == '5',
\r
150 is_staff = sxu.get('usertypeid') == '4',
\r
152 date_joined = readTime(sxu.get('creationdate')),
\r
153 last_seen = readTime(sxu.get('lastaccessdate')),
\r
154 about = sxu.get('aboutme', ''),
\r
155 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,
\r
156 email_isvalid = int(sxu.get('usertypeid')) > 2,
\r
157 website = sxu.get('websiteurl', ''),
\r
158 reputation = int(sxu.get('reputation')),
\r
159 gold = int(badges['1']),
\r
160 silver = int(badges['2']),
\r
161 bronze = int(badges['3']),
\r
162 real_name = sxu.get('realname', ''),
\r
163 location = sxu.get('location', ''),
\r
168 user_joins = orm.Action(
\r
169 action_type = "userjoins",
\r
170 action_date = osqau.date_joined,
\r
175 rep = orm.ActionRepute(
\r
178 date = osqau.date_joined,
\r
179 action = user_joins
\r
184 orm.SubscriptionSettings.objects.get(user=osqau)
\r
186 s = orm.SubscriptionSettings(user=osqau)
\r
189 uidmapper[osqau.id] = osqau.id
\r
191 new_about = sxu.get('aboutme', None)
\r
192 if new_about and osqau.about != new_about:
\r
194 osqau.about = "%s\n|\n%s" % (osqau.about, new_about)
\r
196 osqau.about = new_about
\r
198 osqau.username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', final_username_attempt(sxu))))
\r
199 osqau.email = sxu.get('email', '')
\r
200 osqau.reputation += int(sxu.get('reputation'))
\r
201 osqau.gold += int(badges['1'])
\r
202 osqau.silver += int(badges['2'])
\r
203 osqau.bronze += int(badges['3'])
\r
205 osqau.date_joined = readTime(sxu.get('creationdate'))
\r
206 osqau.website = sxu.get('websiteurl', '')
\r
207 osqau.date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None
\r
208 osqau.location = sxu.get('location', '')
\r
209 osqau.real_name = sxu.get('realname', '')
\r
211 merged_users.append(osqau.id)
\r
214 user_by_name[osqau.username] = osqau
\r
216 openid = sxu.get('openid', None)
\r
217 if openid and openidre.match(openid):
\r
218 assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")
\r
221 if uidmapper[-1] == -1:
\r
224 return (uidmapper, merged_users)
\r
226 def tagsimport(dump, uidmap):
\r
227 tags = readTable(dump, "Tags")
\r
233 id = int(sxtag['id']),
\r
234 name = sxtag['name'],
\r
235 used_count = int(sxtag['count']),
\r
236 created_by_id = uidmap[sxtag.get('userid', 1)],
\r
240 tagmap[otag.name] = otag
\r
244 def postimport(dump, uidmap, tagmap):
\r
249 for h in readTable(dump, "PostHistory"):
\r
250 if not history.get(h.get('postid'), None):
\r
251 history[h.get('postid')] = []
\r
253 history[h.get('postid')].append(h)
\r
255 posts = readTable(dump, "Posts")
\r
257 for sxpost in posts:
\r
258 nodetype = (sxpost.get('posttypeid') == '1') and "nodetype" or "answer"
\r
261 node_type = nodetype,
\r
263 added_at = readTime(sxpost['creationdate']),
\r
264 body = sxpost['body'],
\r
265 score = sxpost.get('score', 0),
\r
266 author_id = sxpost.get('deletiondate', None) and 1 or uidmap[sxpost['owneruserid']]
\r
271 create_action = orm.Action(
\r
272 action_type = (nodetype == "nodetype") and "ask" or "answer",
\r
273 user_id = post.author_id,
\r
275 action_date = post.added_at
\r
278 create_action.save()
\r
280 #if sxpost.get('deletiondate', None):
\r
281 # delete_action = orm.Action(
\r
282 # action_type = "delete",
\r
285 # action_date = readTime(sxpost['deletiondate'])
\r
288 # delete_action.save()
\r
289 # post.deleted = delete_action
\r
291 if sxpost.get('lasteditoruserid', None):
\r
292 revise_action = orm.Action(
\r
293 action_type = "revise",
\r
294 user_id = uidmap[sxpost.get('lasteditoruserid')],
\r
296 action_date = readTime(sxpost['lasteditdate']),
\r
299 revise_action.save()
\r
300 post.last_edited = revise_action
\r
302 if sxpost.get('communityowneddate', None):
\r
305 wikify_action = orm.Action(
\r
306 action_type = "wikify",
\r
309 action_date = readTime(sxpost['communityowneddate'])
\r
312 wikify_action.save()
\r
315 if sxpost.get('lastactivityuserid', None):
\r
316 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]
\r
317 post.last_activity_at = readTime(sxpost['lastactivitydate'])
\r
320 if sxpost.get('posttypeid') == '1': #question
\r
321 post.node_type = "question"
\r
322 post.title = sxpost['title']
\r
324 tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à ', '')
\r
325 post.tagnames = tagnames
\r
327 post.extra_count = sxpost.get('viewcount', 0)
\r
329 #if sxpost.get('closeddate', None):
\r
330 # post.marked = True
\r
332 # close_action = orm.Action(
\r
333 # action_type = "close",
\r
336 # action_date = datetime.now() - timedelta(days=7)
\r
339 # close_action.save()
\r
340 # post.extra_action = close_action
\r
342 #if sxpost.get('acceptedanswerid', None):
\r
343 # accepted[int(sxpost.get('acceptedanswerid'))] = post
\r
348 post.parent_id = sxpost['parentid']
\r
350 #if int(post.id) in accepted:
\r
351 #post.marked = True
\r
353 #accept_action = orm.Action(
\r
354 # action_type = "acceptanswer",
\r
355 # user_id = accepted[int(post.id)].author_id,
\r
357 # action_date = datetime.now() - timedelta(days=7)
\r
360 #accept_action.save()
\r
363 #post.accepted_at = datetime.now()
\r
364 #post.accepted_by_id = accepted[int(post.id)].author_id
\r
366 #accepted[int(post.id)].extra_ref = post
\r
367 #accepted[int(post.id)].save()
\r
371 all[int(post.id)] = post
\r
375 def comment_import(dump, uidmap, posts):
\r
376 comments = readTable(dump, "PostComments")
\r
377 currid = max(posts.keys())
\r
380 for sxc in comments:
\r
384 node_type = "comment",
\r
385 added_at = readTime(sxc['creationdate']),
\r
386 author_id = uidmap[sxc.get('userid', 1)],
\r
387 body = sxc['text'],
\r
388 parent_id = sxc.get('postid'),
\r
391 if sxc.get('deletiondate', None):
\r
392 delete_action = orm.Action(
\r
393 action_type = "delete",
\r
394 user_id = uidmap[sxc['deletionuserid']],
\r
395 action_date = readTime(sxc['deletiondate'])
\r
398 oc.author_id = uidmap[sxc['deletionuserid']]
\r
401 delete_action.node = oc
\r
402 delete_action.save()
\r
404 oc.deleted = delete_action
\r
406 oc.author_id = uidmap[sxc.get('userid', 1)]
\r
409 create_action = orm.Action(
\r
410 action_type = "comment",
\r
411 user_id = oc.author_id,
\r
413 action_date = oc.added_at
\r
416 create_action.save()
\r
420 mapping[int(sxc['id'])] = int(oc.id)
\r
422 return posts, mapping
\r
425 def add_tags_to_posts(posts, tagmap):
\r
426 for post in posts.values():
\r
427 if post.node_type == "question":
\r
428 tags = [tag for tag in [tagmap.get(name.strip()) for name in post.tagnames.split(u' ') if name] if tag]
\r
429 post.tagnames = " ".join([t.name for t in tags]).strip()
\r
432 create_and_activate_revision(post)
\r
435 def create_and_activate_revision(post):
\r
436 rev = orm.NodeRevision(
\r
437 author_id = post.author_id,
\r
440 revised_at = post.added_at,
\r
442 summary = 'Initial revision',
\r
443 tagnames = post.tagnames,
\r
444 title = post.title,
\r
448 post.active_revision_id = rev.id
\r
451 def post_vote_import(dump, uidmap, posts):
\r
452 votes = readTable(dump, "Posts2Votes")
\r
453 close_reasons = dict([(r['id'], r['name']) for r in readTable(dump, "CloseReasons")])
\r
458 action = orm.Action(
\r
459 user_id=uidmap[sxv['userid']],
\r
460 action_date = readTime(sxv['creationdate']),
\r
463 node = posts.get(int(sxv['postid']), None)
\r
464 if not node: continue
\r
467 if sxv['votetypeid'] == '1':
\r
469 question = posts.get(int(answer.parent_id), None)
\r
471 action.action_type = "acceptanswer"
\r
474 answer.marked = True
\r
475 answer.extra_action = action
\r
477 question.extra_ref_id = answer.id
\r
482 elif sxv['votetypeid'] in ('2', '3'):
\r
483 if not (action.node.id, action.user_id) in user2vote:
\r
484 user2vote.append((action.node.id, action.user_id))
\r
486 action.action_type = (sxv['votetypeid'] == '2') and "voteup" or "votedown"
\r
490 node_id = action.node.id,
\r
491 user_id = action.user_id,
\r
492 voted_at = action.action_date,
\r
493 value = sxv['votetypeid'] == '2' and 1 or -1,
\r
498 action.action_type = "unknown"
\r
501 elif sxv['votetypeid'] in ('4', '12', '13'):
\r
502 action.action_type = "flag"
\r
506 node = action.node,
\r
507 user_id = action.user_id,
\r
508 flagged_at = action.action_date,
\r
515 elif sxv['votetypeid'] == '5':
\r
516 action.action_type = "favorite"
\r
519 elif sxv['votetypeid'] == '6':
\r
520 action.action_type = "close"
\r
521 action.extra = dbsafe_encode(close_reasons[sxv['comment']])
\r
525 node.extra_action = action
\r
528 elif sxv['votetypeid'] == '7':
\r
529 action.action_type = "unknown"
\r
532 node.marked = False
\r
533 node.extra_action = None
\r
536 elif sxv['votetypeid'] == '10':
\r
537 action.action_type = "delete"
\r
540 node.deleted = action
\r
543 elif sxv['votetypeid'] == '11':
\r
544 action.action_type = "unknown"
\r
547 node.deleted = None
\r
551 action.action_type = "unknown"
\r
555 if sxv.get('targetrepchange', None):
\r
556 rep = orm.ActionRepute(
\r
558 date = action.action_date,
\r
559 user_id = uidmap[sxv['targetuserid']],
\r
560 value = int(sxv['targetrepchange'])
\r
565 if sxv.get('voterrepchange', None):
\r
566 rep = orm.ActionRepute(
\r
568 date = action.action_date,
\r
569 user_id = uidmap[sxv['userid']],
\r
570 value = int(sxv['voterrepchange'])
\r
576 def comment_vote_import(dump, uidmap, comments, posts):
\r
577 votes = readTable(dump, "Comments2Votes")
\r
581 if sxv['votetypeid'] == "2":
\r
582 comment_id = comments[int(sxv['postcommentid'])]
\r
583 user_id = uidmap[sxv['userid']]
\r
585 if not (comment_id, user_id) in user2vote:
\r
586 user2vote.append((comment_id, user_id))
\r
588 action = orm.Action(
\r
589 action_type = "voteupcomment",
\r
591 action_date = readTime(sxv['creationdate']),
\r
592 node_id = comment_id
\r
597 node_id = comment_id,
\r
599 voted_at = action.action_date,
\r
606 posts[int(action.node_id)].score += 1
\r
607 posts[int(action.node_id)].save()
\r
611 def badges_import(dump, uidmap, post_list):
\r
612 node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')
\r
613 obadges = dict([(b.cls, b) for b in orm.Badge.objects.all()])
\r
614 sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])
\r
615 user_badge_count = {}
\r
619 for id, sxb in sxbadges.items():
\r
620 cls = "".join(sxb['name'].replace('&', 'And').split(' '))
\r
623 sx_to_osqa[id] = obadges[cls]
\r
628 type = sxb['class']
\r
631 sx_to_osqa[id] = osqab
\r
633 sxawards = readTable(dump, "Users2Badges")
\r
636 for sxa in sxawards:
\r
637 badge = sx_to_osqa[int(sxa['badgeid'])]
\r
639 user_id = uidmap[sxa['userid']]
\r
640 if not user_badge_count.get(user_id, None):
\r
641 user_badge_count[user_id] = 0
\r
643 action = orm.Action(
\r
644 action_type = "award",
\r
646 action_date = readTime(sxa['date'])
\r
652 user_id = uidmap[sxa['userid']],
\r
654 node = post_list[user_badge_count[user_id]],
\r
655 awarded_at = action.action_date,
\r
660 badge.awarded_count += 1
\r
661 user_badge_count[user_id] += 1
\r
663 for badge in obadges.values():
\r
667 def reset_sequences():
\r
668 from south.db import db
\r
669 if db.backend_name == "postgres":
\r
670 db.start_transaction()
\r
671 db.execute_many(PG_SEQUENCE_RESETS)
\r
672 db.commit_transaction()
\r
674 def sximport(dump, options):
\r
675 uidmap, merged_users = userimport(dump, options)
\r
676 tagmap = tagsimport(dump, uidmap)
\r
677 posts = postimport(dump, uidmap, tagmap)
\r
678 posts, comments = comment_import(dump, uidmap, posts)
\r
679 add_tags_to_posts(posts, tagmap)
\r
680 post_vote_import(dump, uidmap, posts)
\r
681 comment_vote_import(dump, uidmap, comments, posts)
\r
682 badges_import(dump, uidmap, posts.values())
\r
684 from south.db import db
\r
685 db.commit_transaction()
\r
691 PG_SEQUENCE_RESETS = """
\r
692 SELECT setval('"auth_user_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user";
\r
693 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";
\r
694 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";
\r
695 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";
\r
696 SELECT setval('"forum_action_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_action";
\r
697 SELECT setval('"forum_actionrepute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_actionrepute";
\r
698 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";
\r
699 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";
\r
700 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";
\r
701 SELECT setval('"forum_tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_tag";
\r
702 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";
\r
703 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";
\r
704 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
705 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";
\r
706 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
707 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";
\r
708 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
709 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
710 SELECT setval('"forum_vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_vote";
\r
711 SELECT setval('"forum_flag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_flag";
\r
712 SELECT setval('"forum_badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_badge";
\r
713 SELECT setval('"forum_award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_award";
\r
714 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";
\r
715 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";
\r