1 # -*- coding: utf-8 -*-
3 from xml.dom import minidom
4 from datetime import datetime, timedelta
7 from django.utils.translation import ugettext as _
8 from django.template.defaultfilters import slugify
9 from forum.models.utils import dbsafe_encode
12 from django.utils.encoding import force_unicode
15 from cPickle import loads, dumps
17 from pickle import loads, dumps
19 from copy import deepcopy
20 from base64 import b64encode, b64decode
21 from zlib import compress, decompress
23 def dbsafe_encode(value):
24 return force_unicode(b64encode(compress(dumps(deepcopy(value)))))
28 for node in el.childNodes:
29 if node.nodeType == node.TEXT_NODE:
33 msstrip = re.compile(r'^(.*)\.\d+')
35 noms = msstrip.match(ts)
39 return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])
42 return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])
44 def readTable(dump, name):
45 return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]
47 google_accounts_lookup = re.compile(r'^https?://www.google.com/accounts/')
48 yahoo_accounts_lookup = re.compile(r'^https?://me.yahoo.com/a/')
51 re.compile(r'^https?://www.google.com/profiles/(?P<uname>\w+(\.\w+)*)/?$'),
52 re.compile(r'^https?://me.yahoo.com/(?P<uname>\w+(\.\w+)*)/?$'),
53 re.compile(r'^https?://openid.aol.com/(?P<uname>\w+(\.\w+)*)/?$'),
54 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).myopenid.com/?$'),
55 re.compile(r'^https?://flickr.com/(\w+/)*(?P<uname>\w+(\.\w+)*)/?$'),
56 re.compile(r'^https?://technorati.com/people/technorati/(?P<uname>\w+(\.\w+)*)/?$'),
57 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).wordpress.com/?$'),
58 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).blogspot.com/?$'),
59 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).livejournal.com/?$'),
60 re.compile(r'^https?://claimid.com/(?P<uname>\w+(\.\w+)*)/?$'),
61 re.compile(r'^https?://(?P<uname>\w+(\.\w+)*).pip.verisignlabs.com/?$'),
62 re.compile(r'^https?://getopenid.com/(?P<uname>\w+(\.\w+)*)/?$'),
63 re.compile(r'^https?://[\w\.]+/(\w+/)*(?P<uname>\w+(\.\w+)*)/?$'),
64 re.compile(r'^https?://(?P<uname>[\w\.]+)/?$'),
67 def final_username_attempt(sxu):
68 openid = sxu.get('openid', None)
71 if google_accounts_lookup.search(openid):
72 return UnknownGoogleUser(sxu.get('id'))
73 if yahoo_accounts_lookup.search(openid):
74 return UnknownYahooUser(sxu.get('id'))
76 for lookup in openid_lookups:
77 if lookup.search(openid):
78 return lookup.search(openid).group('uname')
80 return UnknownUser(sxu.get('id'))
82 class UnknownUser(object):
83 def __init__(self, id):
87 return _("user-%(id)s") % {'id': self._id}
89 def __unicode__(self):
92 def encode(self, *args):
95 class UnknownGoogleUser(UnknownUser):
97 return _("user-%(id)s (google)") % {'id': self._id}
99 class UnknownYahooUser(UnknownUser):
101 return _("user-%(id)s (yahoo)") % {'id': self._id}
104 class IdMapper(dict):
105 def __getitem__(self, key):
107 return super(IdMapper, self).get(key, 1)
109 def __setitem__(self, key, value):
110 super(IdMapper, self).__setitem__(int(key), int(value))
112 openidre = re.compile('^https?\:\/\/')
113 def userimport(dump, options):
114 users = readTable(dump, "Users")
117 uidmapper = IdMapper()
120 owneruid = options.get('owneruid', None)
121 #check for empty values
128 if sxu.get('id') == '-1':
131 if int(sxu.get('id')) == int(owneruid):
132 osqau = orm.User.objects.get(id=1)
133 uidmapper[owneruid] = 1
137 username = sxu.get('displayname',
138 sxu.get('displaynamecleaned', sxu.get('realname', final_username_attempt(sxu))))
140 if not isinstance(username, UnknownUser) and username in user_by_name:
141 #if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:
142 # osqau = user_by_name[username]
144 # uidmapper[sxu.get('id')] = osqau.id
147 while ("%s %d" % (username, inc)) in user_by_name:
150 username = "%s %d" % (username, inc)
152 sxbadges = sxu.get('badgesummary', None)
153 badges = {'1':'0', '2':'0', '3':'0'}
156 badges.update(dict([b.split('=') for b in sxbadges.split()]))
161 username = unicode(username),
163 email = sxu.get('email', ''),
164 is_superuser = sxu.get('usertypeid') == '5',
165 is_staff = sxu.get('usertypeid') == '4',
167 date_joined = readTime(sxu.get('creationdate')),
168 last_seen = readTime(sxu.get('lastaccessdate')),
169 about = sxu.get('aboutme', ''),
170 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,
171 email_isvalid = int(sxu.get('usertypeid')) > 2,
172 website = sxu.get('websiteurl', ''),
173 reputation = int(sxu.get('reputation')),
174 gold = int(badges['1']),
175 silver = int(badges['2']),
176 bronze = int(badges['3']),
177 real_name = sxu.get('realname', ''),
178 location = sxu.get('location', ''),
183 user_joins = orm.Action(
184 action_type = "userjoins",
185 action_date = osqau.date_joined,
190 rep = orm.ActionRepute(
193 date = osqau.date_joined,
199 orm.SubscriptionSettings.objects.get(user=osqau)
201 s = orm.SubscriptionSettings(user=osqau)
204 uidmapper[osqau.id] = osqau.id
206 new_about = sxu.get('aboutme', None)
207 if new_about and osqau.about != new_about:
209 osqau.about = "%s\n|\n%s" % (osqau.about, new_about)
211 osqau.about = new_about
213 osqau.username = sxu.get('displayname',
214 sxu.get('displaynamecleaned', sxu.get('realname', final_username_attempt(sxu))))
215 osqau.email = sxu.get('email', '')
216 osqau.reputation += int(sxu.get('reputation'))
217 osqau.gold += int(badges['1'])
218 osqau.silver += int(badges['2'])
219 osqau.bronze += int(badges['3'])
221 osqau.date_joined = readTime(sxu.get('creationdate'))
222 osqau.website = sxu.get('websiteurl', '')
223 osqau.date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None
224 osqau.location = sxu.get('location', '')
225 osqau.real_name = sxu.get('realname', '')
227 merged_users.append(osqau.id)
230 user_by_name[osqau.username] = osqau
232 openid = sxu.get('openid', None)
233 if openid and openidre.match(openid):
234 assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")
237 if uidmapper[-1] == -1:
240 return (uidmapper, merged_users)
242 def tagsimport(dump, uidmap):
243 tags = readTable(dump, "Tags")
249 id = int(sxtag['id']),
250 name = sxtag['name'],
251 used_count = int(sxtag['count']),
252 created_by_id = uidmap[sxtag.get('userid', 1)],
256 tagmap[otag.name] = otag
260 def add_post_state(name, post, action):
261 if not "(%s)" % name in post.state_string:
262 post.state_string = "%s(%s)" % (post.state_string, name)
266 state = orm.NodeState.objects.get(node=post, state_type=name)
267 state.action = action
270 state = orm.NodeState(node=post, state_type=name, action=action)
273 def remove_post_state(name, post):
274 if "(%s)" % name in post.state_string:
276 state = orm.NodeState.objects.get(state_type=name, post=post)
280 post.state_string = "".join("(%s)" % s for s in re.findall('\w+', post.state_string) if s != name)
282 def postimport(dump, uidmap, tagmap):
287 for h in readTable(dump, "PostHistory"):
288 if not history.get(h.get('postid'), None):
289 history[h.get('postid')] = []
291 history[h.get('postid')].append(h)
293 posts = readTable(dump, "Posts")
296 nodetype = (sxpost.get('posttypeid') == '1') and "nodetype" or "answer"
299 node_type = nodetype,
301 added_at = readTime(sxpost['creationdate']),
302 body = sxpost['body'],
303 score = sxpost.get('score', 0),
304 author_id = sxpost.get('deletiondate', None) and 1 or uidmap[sxpost.get('owneruserid', 1)]
309 create_action = orm.Action(
310 action_type = (nodetype == "nodetype") and "ask" or "answer",
311 user_id = post.author_id,
313 action_date = post.added_at
318 if sxpost.get('lasteditoruserid', None):
319 revise_action = orm.Action(
320 action_type = "revise",
321 user_id = uidmap[sxpost.get('lasteditoruserid')],
323 action_date = readTime(sxpost['lasteditdate']),
327 post.last_edited = revise_action
329 if sxpost.get('communityowneddate', None):
330 wikify_action = orm.Action(
331 action_type = "wikify",
334 action_date = readTime(sxpost['communityowneddate'])
338 add_post_state("wiki", post, wikify_action)
340 if sxpost.get('lastactivityuserid', None):
341 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]
342 post.last_activity_at = readTime(sxpost['lastactivitydate'])
344 if sxpost.get('posttypeid') == '1': #question
345 post.node_type = "question"
346 post.title = sxpost['title']
348 tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à ', '')
349 post.tagnames = tagnames
351 post.extra_count = sxpost.get('viewcount', 0)
354 post.parent_id = sxpost['parentid']
358 all[int(post.id)] = post
362 def comment_import(dump, uidmap, posts):
363 comments = readTable(dump, "PostComments")
364 currid = max(posts.keys())
371 node_type = "comment",
372 added_at = readTime(sxc['creationdate']),
373 author_id = uidmap[sxc.get('userid', 1)],
375 parent_id = sxc.get('postid'),
378 if sxc.get('deletiondate', None):
379 delete_action = orm.Action(
380 action_type = "delete",
381 user_id = uidmap[sxc['deletionuserid']],
382 action_date = readTime(sxc['deletiondate'])
385 oc.author_id = uidmap[sxc['deletionuserid']]
388 delete_action.node = oc
391 add_post_state("deleted", oc, delete_action)
393 oc.author_id = uidmap[sxc.get('userid', 1)]
396 create_action = orm.Action(
397 action_type = "comment",
398 user_id = oc.author_id,
400 action_date = oc.added_at
407 mapping[int(sxc['id'])] = int(oc.id)
409 return posts, mapping
412 def add_tags_to_posts(posts, tagmap):
413 for post in posts.values():
414 if post.node_type == "question":
415 tags = [tag for tag in [tagmap.get(name.strip()) for name in post.tagnames.split(u' ') if name] if tag]
416 post.tagnames = " ".join([t.name for t in tags]).strip()
419 create_and_activate_revision(post)
422 def create_and_activate_revision(post):
423 rev = orm.NodeRevision(
424 author_id = post.author_id,
427 revised_at = post.added_at,
429 summary = 'Initial revision',
430 tagnames = post.tagnames,
435 post.active_revision_id = rev.id
438 def post_vote_import(dump, uidmap, posts):
439 votes = readTable(dump, "Posts2Votes")
440 close_reasons = dict([(r['id'], r['name']) for r in readTable(dump, "CloseReasons")])
446 user_id=uidmap[sxv['userid']],
447 action_date = readTime(sxv['creationdate']),
450 node = posts.get(int(sxv['postid']), None)
451 if not node: continue
454 if sxv['votetypeid'] == '1':
456 question = posts.get(int(answer.parent_id), None)
458 action.action_type = "acceptanswer"
463 question.extra_ref_id = answer.id
468 elif sxv['votetypeid'] in ('2', '3'):
469 if not (action.node.id, action.user_id) in user2vote:
470 user2vote.append((action.node.id, action.user_id))
472 action.action_type = (sxv['votetypeid'] == '2') and "voteup" or "votedown"
476 node_id = action.node.id,
477 user_id = action.user_id,
478 voted_at = action.action_date,
479 value = sxv['votetypeid'] == '2' and 1 or -1,
484 action.action_type = "unknown"
487 elif sxv['votetypeid'] in ('4', '12', '13'):
488 action.action_type = "flag"
493 user_id = action.user_id,
494 flagged_at = action.action_date,
501 elif sxv['votetypeid'] == '5':
502 action.action_type = "favorite"
505 elif sxv['votetypeid'] == '6':
506 action.action_type = "close"
507 action.extra = dbsafe_encode(close_reasons[sxv['comment']])
513 elif sxv['votetypeid'] == '7':
514 action.action_type = "unknown"
520 remove_post_state("closed", node)
522 elif sxv['votetypeid'] == '10':
523 action.action_type = "delete"
526 elif sxv['votetypeid'] == '11':
527 action.action_type = "unknown"
530 remove_post_state("deleted", node)
533 action.action_type = "unknown"
536 if sxv.get('targetrepchange', None):
537 rep = orm.ActionRepute(
539 date = action.action_date,
540 user_id = uidmap[sxv['targetuserid']],
541 value = int(sxv['targetrepchange'])
546 if sxv.get('voterrepchange', None):
547 rep = orm.ActionRepute(
549 date = action.action_date,
550 user_id = uidmap[sxv['userid']],
551 value = int(sxv['voterrepchange'])
556 if action.action_type in ("acceptanswer", "delete", "close"):
557 state = {"acceptanswer": "accepted", "delete": "deleted", "close": "closed"}[action.action_type]
558 add_post_state(state, node, action)
561 def comment_vote_import(dump, uidmap, comments, posts):
562 votes = readTable(dump, "Comments2Votes")
566 if sxv['votetypeid'] == "2":
567 comment_id = comments[int(sxv['postcommentid'])]
568 user_id = uidmap[sxv['userid']]
570 if not (comment_id, user_id) in user2vote:
571 user2vote.append((comment_id, user_id))
574 action_type = "voteupcomment",
576 action_date = readTime(sxv['creationdate']),
582 node_id = comment_id,
584 voted_at = action.action_date,
591 posts[int(action.node_id)].score += 1
592 posts[int(action.node_id)].save()
595 def badges_import(dump, uidmap, post_list):
596 node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')
597 obadges = dict([(b.cls, b) for b in orm.Badge.objects.all()])
598 sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])
599 user_badge_count = {}
603 for id, sxb in sxbadges.items():
604 cls = "".join(sxb['name'].replace('&', 'And').split(' '))
607 sx_to_osqa[id] = obadges[cls]
615 sx_to_osqa[id] = osqab
617 sxawards = readTable(dump, "Users2Badges")
621 badge = sx_to_osqa[int(sxa['badgeid'])]
623 user_id = uidmap[sxa['userid']]
624 if not user_badge_count.get(user_id, None):
625 user_badge_count[user_id] = 0
628 action_type = "award",
630 action_date = readTime(sxa['date'])
636 user_id = uidmap[sxa['userid']],
638 node = post_list[user_badge_count[user_id]],
639 awarded_at = action.action_date,
644 badge.awarded_count += 1
645 user_badge_count[user_id] += 1
647 for badge in obadges.values():
650 def pages_import(dump):
652 sx_pages = readTable(dump, "FlatPages")
658 body = b64decode(sxp['value']),
659 extra = dbsafe_encode({
660 'path': sxp['url'][1:],
661 'mimetype': sxp['contenttype'],
662 'template': (sxp['usemaster'] == "true") and "default" or "none",
665 'sidebar_wrap': True,
666 'sidebar_render': "html",
673 registry[sxp['url'][1:]] = page.id
675 create_action = orm.Action(
676 action_type = "newpage",
677 user_id = page.author_id,
683 if sxp['active'] == "true" and sxp['contenttype'] == "text/html":
684 pub_action = orm.Action(
685 action_type = "publish",
686 user_id = page.author_id,
691 add_post_state("published", page, pub_action)
693 kv = orm.KeyValue(key='STATIC_PAGE_REGISTRY', value=dbsafe_encode(registry))
697 u'theme.html.name': 'APP_TITLE',
698 u'theme.html.footer': 'USE_CUSTOM_FOOTER',
699 u'theme.html.sidebar': 'SIDEBAR_UPPER_TEXT',
700 u'theme.html.sidebar-low': 'SIDEBAR_LOWER_TEXT',
701 u'theme.html.welcome': 'APP_INTRO',
702 u'theme.html.head': 'CUSTOM_HEAD',
703 u'theme.html.header': 'CUSTOM_HEADER'
714 def html_decode(html):
715 html = force_unicode(html)
717 for args in html_codes:
718 html = html.replace(*args)
723 def static_import(dump):
724 sx_sets = readTable(dump, "ThemeTextResources")
728 if unicode(set['name']) in sx2osqa_set_map:
730 key = sx2osqa_set_map[set['name']],
731 value = dbsafe_encode(html_decode(set['value']))
736 sx_unknown[set['name']] = html_decode(set['value'])
738 unknown = orm.KeyValue(key='SXIMPORT_UNKNOWN_SETS', value=dbsafe_encode(sx_unknown))
742 def reset_sequences():
743 from south.db import db
744 if db.backend_name == "postgres":
745 db.start_transaction()
746 db.execute_many(PG_SEQUENCE_RESETS)
747 db.commit_transaction()
749 def sximport(dump, options):
750 #uidmap, merged_users = userimport(dump, options)
751 #tagmap = tagsimport(dump, uidmap)
752 #posts = postimport(dump, uidmap, tagmap)
753 #posts, comments = comment_import(dump, uidmap, posts)
754 #add_tags_to_posts(posts, tagmap)
755 #post_vote_import(dump, uidmap, posts)
756 #comment_vote_import(dump, uidmap, comments, posts)
757 #badges_import(dump, uidmap, posts.values())
762 from south.db import db
763 db.commit_transaction()
768 PG_SEQUENCE_RESETS = """
769 SELECT setval('"auth_user_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user";
770 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";
771 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";
772 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";
773 SELECT setval('"forum_action_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_action";
774 SELECT setval('"forum_actionrepute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_actionrepute";
775 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";
776 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";
777 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";
778 SELECT setval('"forum_tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_tag";
779 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";
780 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";
781 SELECT setval('"forum_nodestate_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_nodestate";
782 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
783 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";
784 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
785 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";
786 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
787 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
788 SELECT setval('"forum_vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_vote";
789 SELECT setval('"forum_flag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_flag";
790 SELECT setval('"forum_badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_badge";
791 SELECT setval('"forum_award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_award";
792 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";
793 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";