1 # -*- coding: utf-8 -*-
\r
3 from xml.dom import minidom
\r
4 from datetime import datetime
\r
7 from django.utils.translation import ugettext as _
\r
8 from django.template.defaultfilters import slugify
\r
13 for node in el.childNodes:
\r
14 if node.nodeType == node.TEXT_NODE:
\r
18 msstrip = re.compile(r'^(.*)\.\d+')
\r
20 noms = msstrip.match(ts)
\r
24 return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])
\r
27 return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])
\r
29 def readTable(dump, name):
\r
30 return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]
\r
32 class UnknownUser(object):
\r
35 UnknownUser.counter += 1
\r
36 self.number = UnknownUser.counter
\r
39 return _("Unknown user %(number)d") % {'number': self.number}
\r
41 def __unicode__(self):
\r
42 return self.__str__()
\r
44 def encode(self, *args):
\r
45 return self.__str__()
\r
47 class IdMapper(dict):
\r
48 def __getitem__(self, key):
\r
50 return super(IdMapper, self).get(key, key)
\r
52 def __setitem__(self, key, value):
\r
53 super(IdMapper, self).__setitem__(int(key), int(value))
\r
55 openidre = re.compile('^https?\:\/\/')
\r
56 def userimport(dump, options):
\r
57 users = readTable(dump, "Users")
\r
60 uidmapper = IdMapper()
\r
63 owneruid = options.get('owneruid', None)
\r
64 #check for empty values
\r
71 if sxu.get('id') == '-1':
\r
74 if int(sxu.get('id')) == int(owneruid):
\r
75 osqau = orm.User.objects.get(id=1)
\r
76 uidmapper[owneruid] = 1
\r
80 username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))
\r
82 if not isinstance(username, UnknownUser) and username in user_by_name:
\r
83 if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:
\r
84 osqau = user_by_name[username]
\r
86 uidmapper[sxu.get('id')] = osqau.id
\r
89 while ("%s %d" % (username, inc)) in user_by_name:
\r
92 username = "%s %d" % (username, inc)
\r
94 sxbadges = sxu.get('badgesummary', None)
\r
95 badges = {'1':'0','2':'0','3':'0'}
\r
98 badges.update(dict([b.split('=') for b in sxbadges.split()]))
\r
102 id = sxu.get('id'),
\r
103 username = unicode(username),
\r
105 email = sxu.get('email', ''),
\r
106 is_superuser = sxu.get('usertypeid') == '5',
\r
107 is_staff = sxu.get('usertypeid') == '4',
\r
109 date_joined = readTime(sxu.get('creationdate')),
\r
110 about = sxu.get('aboutme', ''),
\r
111 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,
\r
112 email_isvalid = int(sxu.get('usertypeid')) > 2,
\r
113 website = sxu.get('websiteurl', ''),
\r
114 reputation = int(sxu.get('reputation')),
\r
115 gold = int(badges['1']),
\r
116 silver = int(badges['2']),
\r
117 bronze = int(badges['3']),
\r
118 real_name = sxu.get('realname', ''),
\r
123 s = orm.SubscriptionSettings(user=osqau)
\r
126 user_by_name[osqau.username] = osqau
\r
128 new_about = sxu.get('aboutme', None)
\r
129 if new_about and osqau.about != new_about:
\r
131 osqau.about = "%s\n|\n%s" % (osqau.about, new_about)
\r
133 osqau.about = new_about
\r
135 osqau.username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))
\r
136 osqau.email = sxu.get('email', '')
\r
137 osqau.reputation += int(sxu.get('reputation'))
\r
138 osqau.gold += int(badges['1'])
\r
139 osqau.silver += int(badges['2'])
\r
140 osqau.bronze += int(badges['3'])
\r
142 merged_users.append(osqau.id)
\r
146 openid = sxu.get('openid', None)
\r
147 if openid and openidre.match(openid):
\r
148 assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")
\r
151 if uidmapper[-1] == -1:
\r
154 return (uidmapper, merged_users)
\r
156 def tagsimport(dump, uidmap):
\r
157 tags = readTable(dump, "Tags")
\r
163 id = int(sxtag['id']),
\r
164 name = sxtag['name'],
\r
165 used_count = int(sxtag['count']),
\r
166 created_by_id = uidmap[sxtag.get('userid', 1)],
\r
170 tagmap[otag.name] = otag
\r
174 def postimport(dump, uidmap, tagmap):
\r
179 for h in readTable(dump, "PostHistory"):
\r
180 if not history.get(h.get('postid'), None):
\r
181 history[h.get('postid')] = []
\r
183 history[h.get('postid')].append(h)
\r
185 posts = readTable(dump, "Posts")
\r
187 for sxpost in posts:
\r
188 postclass = sxpost.get('posttypeid') == '1' and orm.Question or orm.Answer
\r
192 added_at = readTime(sxpost['creationdate']),
\r
193 body = sxpost['body'],
\r
194 score = sxpost.get('score', 0),
\r
196 vote_down_count = 0
\r
199 if sxpost.get('deletiondate', None):
\r
200 post.deleted = True
\r
201 post.deleted_at = readTime(sxpost['deletiondate'])
\r
204 post.author_id = uidmap[sxpost['owneruserid']]
\r
206 if sxpost.get('lasteditoruserid', None):
\r
207 post.last_edited_by_id = uidmap[sxpost.get('lasteditoruserid')]
\r
208 post.last_edited_at = readTime(sxpost['lasteditdate'])
\r
210 if sxpost.get('communityowneddate', None):
\r
212 post.wikified_at = readTime(sxpost['communityowneddate'])
\r
214 if sxpost.get('posttypeid') == '1': #question
\r
215 post.node_type = "question"
\r
216 post.title = sxpost['title']
\r
218 tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à', '')
\r
219 post.tagnames = tagnames
\r
221 post.view_count = sxpost.get('viewcount', 0)
\r
222 post.favourite_count = sxpost.get('favoritecount', 0)
\r
223 post.answer_count = sxpost.get('answercount', 0)
\r
225 if sxpost.get('lastactivityuserid', None):
\r
226 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]
\r
227 post.last_activity_at = readTime(sxpost['lastactivitydate'])
\r
229 if sxpost.get('closeddate', None):
\r
231 post.closed_by_id = 1
\r
232 post.closed_at = datetime.now()
\r
234 if sxpost.get('acceptedanswerid', None):
\r
235 post.accepted_answer_id = int(sxpost.get('acceptedanswerid'))
\r
236 accepted[int(sxpost.get('acceptedanswerid'))] = post
\r
239 post.node_type = "answer"
\r
240 post.parent_id = sxpost['parentid']
\r
242 if int(post.id) in accepted:
\r
243 post.accepted = True
\r
244 post.accepted_at = datetime.now()
\r
245 post.accepted_by_id = accepted[int(post.id)].author_id
\r
247 all[int(post.id)] = post
\r
251 def comment_import(dump, uidmap, posts):
\r
252 comments = readTable(dump, "PostComments")
\r
253 currid = max(posts.keys())
\r
256 for sxc in comments:
\r
260 node_type = "comment",
\r
261 added_at = readTime(sxc['creationdate']),
\r
262 author_id = uidmap[sxc['userid']],
\r
263 body = sxc['text'],
\r
264 parent_id = sxc.get('postid'),
\r
266 vote_down_count = 0
\r
269 if sxc.get('deletiondate', None):
\r
271 oc.deleted_at = readTime(sxc['deletiondate'])
\r
272 oc.deleted_by_id = uidmap[sxc['deletionuserid']]
\r
273 oc.author_id = uidmap[sxc['deletionuserid']]
\r
275 oc.author_id = uidmap[sxc['userid']]
\r
279 mapping[int(sxc['id'])] = int(oc.id)
\r
281 return posts, mapping
\r
284 def save_posts(posts, tagmap):
\r
285 for post in posts.values():
\r
288 if post.node_type == "question":
\r
289 tags = filter(lambda t: t is not None, [tagmap.get(n, None) for n in post.tagnames.split()])
\r
290 post.tagnames = " ".join([t.name for t in tags]).strip()
\r
293 create_and_activate_revision(post)
\r
296 def create_and_activate_revision(post):
\r
297 rev = orm.NodeRevision(
\r
298 author_id = post.author_id,
\r
301 revised_at = post.added_at,
\r
303 summary = 'Initial revision',
\r
304 tagnames = post.tagnames,
\r
305 title = post.title,
\r
309 post.active_revision_id = rev.id
\r
313 def post_vote_import(dump, uidmap, posts):
\r
314 votes = readTable(dump, "Posts2Votes")
\r
317 if sxv['votetypeid'] in ('2', '3'):
\r
319 node_id = sxv['postid'],
\r
320 user_id = uidmap[sxv['userid']],
\r
321 voted_at = readTime(sxv['creationdate']),
\r
322 vote = sxv['votetypeid'] == '2' and 1 or -1,
\r
325 if sxv['votetypeid'] == '2':
\r
326 posts[int(sxv['postid'])].vote_up_count += 1
\r
328 posts[int(sxv['postid'])].vote_down_count += 1
\r
332 def comment_vote_import(dump, uidmap, comments, posts):
\r
333 votes = readTable(dump, "Comments2Votes")
\r
336 if sxv['votetypeid'] in ('2', '3'):
\r
338 node_id = comments[int(sxv['postcommentid'])],
\r
339 user_id = uidmap[sxv['userid']],
\r
340 voted_at = readTime(sxv['creationdate']),
\r
341 vote = sxv['votetypeid'] == '2' and 1 or -1,
\r
344 if sxv['votetypeid'] == '2':
\r
345 posts[comments[int(sxv['postcommentid'])]].vote_up_count += 1
\r
347 posts[comments[int(sxv['postcommentid'])]].vote_down_count += 1
\r
353 def badges_import(dump, uidmap):
\r
354 node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')
\r
355 obadges = dict([(b.slug, b) for b in orm.Badge.objects.all()])
\r
356 sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])
\r
360 for id, sxb in sxbadges.items():
\r
361 slug = slugify(sxb['name'].replace('&', 'and'))
\r
362 if slug in obadges:
\r
363 sx_to_osqa[id] = obadges[slug]
\r
366 name = sxb['name'],
\r
367 slug = slugify(sxb['name']),
\r
368 description = sxb['description'],
\r
369 multiple = sxb.get('single', 'false') == 'false',
\r
371 type = sxb['class']
\r
374 sx_to_osqa[id] = osqab
\r
376 sxawards = readTable(dump, "Users2Badges")
\r
379 for sxa in sxawards:
\r
380 badge = sx_to_osqa[int(sxa['badgeid'])]
\r
382 user_id = uidmap[sxa['userid']],
\r
384 content_type = node_ctype,
\r
388 osqaawards.append(osqaa)
\r
389 badge.awarded_count += 1
\r
391 for b in sx_to_osqa.values():
\r
394 for a in osqaawards:
\r
398 def reset_sequences():
\r
399 from south.db import db
\r
400 if db.backend_name == "postgres":
\r
401 db.start_transaction()
\r
402 db.execute_many(PG_SEQUENCE_RESETS)
\r
403 db.commit_transaction()
\r
405 def sximport(dump, options):
\r
406 uidmap, merged_users = userimport(dump, options)
\r
407 tagmap = tagsimport(dump, uidmap)
\r
408 posts = postimport(dump, uidmap, tagmap)
\r
409 posts, comments = comment_import(dump, uidmap, posts)
\r
410 save_posts(posts, tagmap)
\r
411 post_vote_import(dump, uidmap, posts)
\r
412 comment_vote_import(dump, uidmap, comments, posts)
\r
413 for post in posts.values():
\r
415 badges_import(dump, uidmap)
\r
417 from south.db import db
\r
418 db.commit_transaction()
\r
424 PG_SEQUENCE_RESETS = """
\r
425 SELECT setval('"auth_user_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user";
\r
426 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";
\r
427 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";
\r
428 SELECT setval('"activity_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "activity";
\r
429 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";
\r
430 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";
\r
431 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";
\r
432 SELECT setval('"tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "tag";
\r
433 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";
\r
434 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";
\r
435 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
436 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";
\r
437 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
438 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
439 SELECT setval('"favorite_question_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "favorite_question";
\r
440 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";
\r
441 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
442 SELECT setval('"vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "vote";
\r
443 SELECT setval('"flagged_item_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "flagged_item";
\r
444 SELECT setval('"badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "badge";
\r
445 SELECT setval('"award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "award";
\r
446 SELECT setval('"repute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "repute";
\r
447 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
448 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";
\r
449 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";
\r
450 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";
\r