1 # -*- coding: utf-8 -*-
\r
3 from xml.dom import minidom
\r
4 from datetime import datetime, timedelta
\r
7 from django.utils.translation import ugettext as _
\r
8 from django.template.defaultfilters import slugify
\r
9 from forum.models.utils import dbsafe_encode
\r
14 for node in el.childNodes:
\r
15 if node.nodeType == node.TEXT_NODE:
\r
19 msstrip = re.compile(r'^(.*)\.\d+')
\r
21 noms = msstrip.match(ts)
\r
25 return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])
\r
28 return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])
\r
30 def readTable(dump, name):
\r
31 return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]
\r
33 google_accounts_lookup = re.compile(r'^https?://www.google.com/accounts/')
\r
34 yahoo_accounts_lookup = re.compile(r'^https?://me.yahoo.com/a/')
\r
37 re.compile(r'^https?://www.google.com/profiles/(?P<uname>\w+(\.\w+)*)/?$'),
\r
38 re.compile(r'^https?://me.yahoo.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
39 re.compile(r'^https?://openid.aol.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
40 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).myopenid.com/?$'),
\r
41 re.compile(r'^https?://flickr.com/(\w+/)*(?P<uname>\w+(\.\w+)*)/?$'),
\r
42 re.compile(r'^https?://technorati.com/people/technorati/(?P<uname>\w+(\.\w+)*)/?$'),
\r
43 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).wordpress.com/?$'),
\r
44 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).blogspot.com/?$'),
\r
45 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).livejournal.com/?$'),
\r
46 re.compile(r'^https?://claimid.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
47 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).pip.verisignlabs.com/?$'),
\r
48 re.compile(r'^https?://getopenid.com/(?P<uname>\w+(\.\w+)*)/?$'),
\r
49 re.compile(r'^https?://[\w\.]+/(\w+/)*(?P<uname>\w+(\.\w+)*)/?$'),
\r
50 re.compile(r'^https?://(?P<uname>[\w\.]+)/?$'),
\r
53 def final_username_attempt(sxu):
\r
54 openid = sxu.get('openid', None)
\r
57 if google_accounts_lookup.search(openid):
\r
58 return UnknownGoogleUser(sxu.get('id'))
\r
59 if yahoo_accounts_lookup.search(openid):
\r
60 return UnknownYahooUser(sxu.get('id'))
\r
62 for lookup in openid_lookups:
\r
63 if lookup.search(openid):
\r
64 return lookup.search(openid).group('uname')
\r
66 return UnknownUser(sxu.get('id'))
\r
68 class UnknownUser(object):
\r
69 def __init__(self, id):
\r
73 return _("user-%(id)s") % {'id': self._id}
\r
75 def __unicode__(self):
\r
76 return self.__str__()
\r
78 def encode(self, *args):
\r
79 return self.__str__()
\r
81 class UnknownGoogleUser(UnknownUser):
\r
83 return _("user-%(id)s (google)") % {'id': self._id}
\r
85 class UnknownYahooUser(UnknownUser):
\r
87 return _("user-%(id)s (yahoo)") % {'id': self._id}
\r
90 class IdMapper(dict):
\r
91 def __getitem__(self, key):
\r
93 return super(IdMapper, self).get(key, 1)
\r
95 def __setitem__(self, key, value):
\r
96 super(IdMapper, self).__setitem__(int(key), int(value))
\r
98 openidre = re.compile('^https?\:\/\/')
\r
99 def userimport(dump, options):
\r
100 users = readTable(dump, "Users")
\r
103 uidmapper = IdMapper()
\r
106 owneruid = options.get('owneruid', None)
\r
107 #check for empty values
\r
114 if sxu.get('id') == '-1':
\r
117 if int(sxu.get('id')) == int(owneruid):
\r
118 osqau = orm.User.objects.get(id=1)
\r
119 uidmapper[owneruid] = 1
\r
123 username = sxu.get('displayname',
\r
124 sxu.get('displaynamecleaned', sxu.get('realname', final_username_attempt(sxu))))
\r
126 if not isinstance(username, UnknownUser) and username in user_by_name:
\r
127 #if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:
\r
128 # osqau = user_by_name[username]
\r
130 # uidmapper[sxu.get('id')] = osqau.id
\r
133 while ("%s %d" % (username, inc)) in user_by_name:
\r
136 username = "%s %d" % (username, inc)
\r
138 sxbadges = sxu.get('badgesummary', None)
\r
139 badges = {'1':'0', '2':'0', '3':'0'}
\r
142 badges.update(dict([b.split('=') for b in sxbadges.split()]))
\r
146 id = sxu.get('id'),
\r
147 username = unicode(username),
\r
149 email = sxu.get('email', ''),
\r
150 is_superuser = sxu.get('usertypeid') == '5',
\r
151 is_staff = sxu.get('usertypeid') == '4',
\r
153 date_joined = readTime(sxu.get('creationdate')),
\r
154 last_seen = readTime(sxu.get('lastaccessdate')),
\r
155 about = sxu.get('aboutme', ''),
\r
156 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,
\r
157 email_isvalid = int(sxu.get('usertypeid')) > 2,
\r
158 website = sxu.get('websiteurl', ''),
\r
159 reputation = int(sxu.get('reputation')),
\r
160 gold = int(badges['1']),
\r
161 silver = int(badges['2']),
\r
162 bronze = int(badges['3']),
\r
163 real_name = sxu.get('realname', ''),
\r
164 location = sxu.get('location', ''),
\r
169 user_joins = orm.Action(
\r
170 action_type = "userjoins",
\r
171 action_date = osqau.date_joined,
\r
176 rep = orm.ActionRepute(
\r
179 date = osqau.date_joined,
\r
180 action = user_joins
\r
185 orm.SubscriptionSettings.objects.get(user=osqau)
\r
187 s = orm.SubscriptionSettings(user=osqau)
\r
190 uidmapper[osqau.id] = osqau.id
\r
192 new_about = sxu.get('aboutme', None)
\r
193 if new_about and osqau.about != new_about:
\r
195 osqau.about = "%s\n|\n%s" % (osqau.about, new_about)
\r
197 osqau.about = new_about
\r
199 osqau.username = sxu.get('displayname',
\r
200 sxu.get('displaynamecleaned', sxu.get('realname', final_username_attempt(sxu))))
\r
201 osqau.email = sxu.get('email', '')
\r
202 osqau.reputation += int(sxu.get('reputation'))
\r
203 osqau.gold += int(badges['1'])
\r
204 osqau.silver += int(badges['2'])
\r
205 osqau.bronze += int(badges['3'])
\r
207 osqau.date_joined = readTime(sxu.get('creationdate'))
\r
208 osqau.website = sxu.get('websiteurl', '')
\r
209 osqau.date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None
\r
210 osqau.location = sxu.get('location', '')
\r
211 osqau.real_name = sxu.get('realname', '')
\r
213 merged_users.append(osqau.id)
\r
216 user_by_name[osqau.username] = osqau
\r
218 openid = sxu.get('openid', None)
\r
219 if openid and openidre.match(openid):
\r
220 assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")
\r
223 if uidmapper[-1] == -1:
\r
226 return (uidmapper, merged_users)
\r
228 def tagsimport(dump, uidmap):
\r
229 tags = readTable(dump, "Tags")
\r
235 id = int(sxtag['id']),
\r
236 name = sxtag['name'],
\r
237 used_count = int(sxtag['count']),
\r
238 created_by_id = uidmap[sxtag.get('userid', 1)],
\r
242 tagmap[otag.name] = otag
\r
246 def add_post_state(name, post, action):
\r
247 if not "(%s)" % name in post.state_string:
\r
248 post.state_string = "%s(%s)" % (post.state_string, name)
\r
252 state = orm.NodeState.objects.get(node=post, state_type=name)
\r
253 state.action = action
\r
256 state = orm.NodeState(node=post, state_type=name, action=action)
\r
259 def remove_post_state(name, post):
\r
260 if "(%s)" % name in post.state_string:
\r
262 state = orm.NodeState.objects.get(state_type=name, post=post)
\r
266 post.state_string = "".join("(%s)" % s for s in re.findall('\w+', post.state_string) if s != name)
\r
268 def postimport(dump, uidmap, tagmap):
\r
273 for h in readTable(dump, "PostHistory"):
\r
274 if not history.get(h.get('postid'), None):
\r
275 history[h.get('postid')] = []
\r
277 history[h.get('postid')].append(h)
\r
279 posts = readTable(dump, "Posts")
\r
281 for sxpost in posts:
\r
282 nodetype = (sxpost.get('posttypeid') == '1') and "nodetype" or "answer"
\r
285 node_type = nodetype,
\r
287 added_at = readTime(sxpost['creationdate']),
\r
288 body = sxpost['body'],
\r
289 score = sxpost.get('score', 0),
\r
290 author_id = sxpost.get('deletiondate', None) and 1 or uidmap[sxpost.get('owneruserid', 1)]
\r
295 create_action = orm.Action(
\r
296 action_type = (nodetype == "nodetype") and "ask" or "answer",
\r
297 user_id = post.author_id,
\r
299 action_date = post.added_at
\r
302 create_action.save()
\r
304 if sxpost.get('lasteditoruserid', None):
\r
305 revise_action = orm.Action(
\r
306 action_type = "revise",
\r
307 user_id = uidmap[sxpost.get('lasteditoruserid')],
\r
309 action_date = readTime(sxpost['lasteditdate']),
\r
312 revise_action.save()
\r
313 post.last_edited = revise_action
\r
315 if sxpost.get('communityowneddate', None):
\r
316 wikify_action = orm.Action(
\r
317 action_type = "wikify",
\r
320 action_date = readTime(sxpost['communityowneddate'])
\r
323 wikify_action.save()
\r
324 add_post_state("wiki", post, wikify_action)
\r
326 if sxpost.get('lastactivityuserid', None):
\r
327 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]
\r
328 post.last_activity_at = readTime(sxpost['lastactivitydate'])
\r
330 if sxpost.get('posttypeid') == '1': #question
\r
331 post.node_type = "question"
\r
332 post.title = sxpost['title']
\r
334 tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à ', '')
\r
335 post.tagnames = tagnames
\r
337 post.extra_count = sxpost.get('viewcount', 0)
\r
340 post.parent_id = sxpost['parentid']
\r
344 all[int(post.id)] = post
\r
348 def comment_import(dump, uidmap, posts):
\r
349 comments = readTable(dump, "PostComments")
\r
350 currid = max(posts.keys())
\r
353 for sxc in comments:
\r
357 node_type = "comment",
\r
358 added_at = readTime(sxc['creationdate']),
\r
359 author_id = uidmap[sxc.get('userid', 1)],
\r
360 body = sxc['text'],
\r
361 parent_id = sxc.get('postid'),
\r
364 if sxc.get('deletiondate', None):
\r
365 delete_action = orm.Action(
\r
366 action_type = "delete",
\r
367 user_id = uidmap[sxc['deletionuserid']],
\r
368 action_date = readTime(sxc['deletiondate'])
\r
371 oc.author_id = uidmap[sxc['deletionuserid']]
\r
374 delete_action.node = oc
\r
375 delete_action.save()
\r
377 add_post_state("deleted", oc, delete_action)
\r
379 oc.author_id = uidmap[sxc.get('userid', 1)]
\r
382 create_action = orm.Action(
\r
383 action_type = "comment",
\r
384 user_id = oc.author_id,
\r
386 action_date = oc.added_at
\r
389 create_action.save()
\r
393 mapping[int(sxc['id'])] = int(oc.id)
\r
395 return posts, mapping
\r
398 def add_tags_to_posts(posts, tagmap):
\r
399 for post in posts.values():
\r
400 if post.node_type == "question":
\r
401 tags = [tag for tag in [tagmap.get(name.strip()) for name in post.tagnames.split(u' ') if name] if tag]
\r
402 post.tagnames = " ".join([t.name for t in tags]).strip()
\r
405 create_and_activate_revision(post)
\r
408 def create_and_activate_revision(post):
\r
409 rev = orm.NodeRevision(
\r
410 author_id = post.author_id,
\r
413 revised_at = post.added_at,
\r
415 summary = 'Initial revision',
\r
416 tagnames = post.tagnames,
\r
417 title = post.title,
\r
421 post.active_revision_id = rev.id
\r
424 def post_vote_import(dump, uidmap, posts):
\r
425 votes = readTable(dump, "Posts2Votes")
\r
426 close_reasons = dict([(r['id'], r['name']) for r in readTable(dump, "CloseReasons")])
\r
431 action = orm.Action(
\r
432 user_id=uidmap[sxv['userid']],
\r
433 action_date = readTime(sxv['creationdate']),
\r
436 node = posts.get(int(sxv['postid']), None)
\r
437 if not node: continue
\r
440 if sxv['votetypeid'] == '1':
\r
442 question = posts.get(int(answer.parent_id), None)
\r
444 action.action_type = "acceptanswer"
\r
447 answer.marked = True
\r
449 question.extra_ref_id = answer.id
\r
454 elif sxv['votetypeid'] in ('2', '3'):
\r
455 if not (action.node.id, action.user_id) in user2vote:
\r
456 user2vote.append((action.node.id, action.user_id))
\r
458 action.action_type = (sxv['votetypeid'] == '2') and "voteup" or "votedown"
\r
462 node_id = action.node.id,
\r
463 user_id = action.user_id,
\r
464 voted_at = action.action_date,
\r
465 value = sxv['votetypeid'] == '2' and 1 or -1,
\r
470 action.action_type = "unknown"
\r
473 elif sxv['votetypeid'] in ('4', '12', '13'):
\r
474 action.action_type = "flag"
\r
478 node = action.node,
\r
479 user_id = action.user_id,
\r
480 flagged_at = action.action_date,
\r
487 elif sxv['votetypeid'] == '5':
\r
488 action.action_type = "favorite"
\r
491 elif sxv['votetypeid'] == '6':
\r
492 action.action_type = "close"
\r
493 action.extra = dbsafe_encode(close_reasons[sxv['comment']])
\r
499 elif sxv['votetypeid'] == '7':
\r
500 action.action_type = "unknown"
\r
503 node.marked = False
\r
506 remove_post_state("closed", node)
\r
508 elif sxv['votetypeid'] == '10':
\r
509 action.action_type = "delete"
\r
512 elif sxv['votetypeid'] == '11':
\r
513 action.action_type = "unknown"
\r
516 remove_post_state("deleted", node)
\r
519 action.action_type = "unknown"
\r
522 if sxv.get('targetrepchange', None):
\r
523 rep = orm.ActionRepute(
\r
525 date = action.action_date,
\r
526 user_id = uidmap[sxv['targetuserid']],
\r
527 value = int(sxv['targetrepchange'])
\r
532 if sxv.get('voterrepchange', None):
\r
533 rep = orm.ActionRepute(
\r
535 date = action.action_date,
\r
536 user_id = uidmap[sxv['userid']],
\r
537 value = int(sxv['voterrepchange'])
\r
542 if action.action_type in ("acceptanswer", "delete", "close"):
\r
543 state = {"acceptanswer": "accepted", "delete": "deleted", "close": "closed"}[action.action_type]
\r
544 add_post_state(state, node, action)
\r
547 def comment_vote_import(dump, uidmap, comments, posts):
\r
548 votes = readTable(dump, "Comments2Votes")
\r
552 if sxv['votetypeid'] == "2":
\r
553 comment_id = comments[int(sxv['postcommentid'])]
\r
554 user_id = uidmap[sxv['userid']]
\r
556 if not (comment_id, user_id) in user2vote:
\r
557 user2vote.append((comment_id, user_id))
\r
559 action = orm.Action(
\r
560 action_type = "voteupcomment",
\r
562 action_date = readTime(sxv['creationdate']),
\r
563 node_id = comment_id
\r
568 node_id = comment_id,
\r
570 voted_at = action.action_date,
\r
577 posts[int(action.node_id)].score += 1
\r
578 posts[int(action.node_id)].save()
\r
581 def badges_import(dump, uidmap, post_list):
\r
582 node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')
\r
583 obadges = dict([(b.cls, b) for b in orm.Badge.objects.all()])
\r
584 sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])
\r
585 user_badge_count = {}
\r
589 for id, sxb in sxbadges.items():
\r
590 cls = "".join(sxb['name'].replace('&', 'And').split(' '))
\r
593 sx_to_osqa[id] = obadges[cls]
\r
598 type = sxb['class']
\r
601 sx_to_osqa[id] = osqab
\r
603 sxawards = readTable(dump, "Users2Badges")
\r
606 for sxa in sxawards:
\r
607 badge = sx_to_osqa[int(sxa['badgeid'])]
\r
609 user_id = uidmap[sxa['userid']]
\r
610 if not user_badge_count.get(user_id, None):
\r
611 user_badge_count[user_id] = 0
\r
613 action = orm.Action(
\r
614 action_type = "award",
\r
616 action_date = readTime(sxa['date'])
\r
622 user_id = uidmap[sxa['userid']],
\r
624 node = post_list[user_badge_count[user_id]],
\r
625 awarded_at = action.action_date,
\r
630 badge.awarded_count += 1
\r
631 user_badge_count[user_id] += 1
\r
633 for badge in obadges.values():
\r
637 def reset_sequences():
\r
638 from south.db import db
\r
639 if db.backend_name == "postgres":
\r
640 db.start_transaction()
\r
641 db.execute_many(PG_SEQUENCE_RESETS)
\r
642 db.commit_transaction()
\r
644 def sximport(dump, options):
\r
645 uidmap, merged_users = userimport(dump, options)
\r
646 tagmap = tagsimport(dump, uidmap)
\r
647 posts = postimport(dump, uidmap, tagmap)
\r
648 posts, comments = comment_import(dump, uidmap, posts)
\r
649 add_tags_to_posts(posts, tagmap)
\r
650 post_vote_import(dump, uidmap, posts)
\r
651 comment_vote_import(dump, uidmap, comments, posts)
\r
652 badges_import(dump, uidmap, posts.values())
\r
654 from south.db import db
\r
655 db.commit_transaction()
\r
660 PG_SEQUENCE_RESETS = """
\r
661 SELECT setval('"auth_user_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user";
\r
662 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";
\r
663 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";
\r
664 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";
\r
665 SELECT setval('"forum_action_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_action";
\r
666 SELECT setval('"forum_actionrepute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_actionrepute";
\r
667 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";
\r
668 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";
\r
669 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";
\r
670 SELECT setval('"forum_tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_tag";
\r
671 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";
\r
672 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";
\r
673 SELECT setval('"forum_nodestate_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_nodestate";
\r
674 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
675 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";
\r
676 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
677 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";
\r
678 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
679 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
680 SELECT setval('"forum_vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_vote";
\r
681 SELECT setval('"forum_flag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_flag";
\r
682 SELECT setval('"forum_badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_badge";
\r
683 SELECT setval('"forum_award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_award";
\r
684 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";
\r
685 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";
\r