1 # -*- coding: utf-8 -*-
\r
3 from xml.dom import minidom
\r
4 from datetime import datetime
\r
7 from django.utils.translation import ugettext as _
\r
8 from django.template.defaultfilters import slugify
\r
13 for node in el.childNodes:
\r
14 if node.nodeType == node.TEXT_NODE:
\r
18 msstrip = re.compile(r'^(.*)\.\d+')
\r
20 noms = msstrip.match(ts)
\r
24 return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])
\r
27 return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])
\r
29 def readTable(dump, name):
\r
30 return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]
\r
32 class UnknownUser(object):
\r
35 UnknownUser.counter += 1
\r
36 self.number = UnknownUser.counter
\r
39 return _("Unknown user %(number)d") % {'number': self.number}
\r
41 def __unicode__(self):
\r
42 return self.__str__()
\r
44 def encode(self, *args):
\r
45 return self.__str__()
\r
47 class IdMapper(dict):
\r
48 def __getitem__(self, key):
\r
50 return super(IdMapper, self).get(key, 1)
\r
52 def __setitem__(self, key, value):
\r
53 super(IdMapper, self).__setitem__(int(key), int(value))
\r
55 openidre = re.compile('^https?\:\/\/')
\r
56 def userimport(dump, options):
\r
57 users = readTable(dump, "Users")
\r
60 uidmapper = IdMapper()
\r
63 owneruid = options.get('owneruid', None)
\r
64 #check for empty values
\r
71 if sxu.get('id') == '-1':
\r
74 if int(sxu.get('id')) == int(owneruid):
\r
75 osqau = orm.User.objects.get(id=1)
\r
76 uidmapper[owneruid] = 1
\r
80 username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))
\r
82 if not isinstance(username, UnknownUser) and username in user_by_name:
\r
83 #if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:
\r
84 # osqau = user_by_name[username]
\r
86 # uidmapper[sxu.get('id')] = osqau.id
\r
89 while ("%s %d" % (username, inc)) in user_by_name:
\r
92 username = "%s %d" % (username, inc)
\r
94 sxbadges = sxu.get('badgesummary', None)
\r
95 badges = {'1':'0','2':'0','3':'0'}
\r
98 badges.update(dict([b.split('=') for b in sxbadges.split()]))
\r
102 id = sxu.get('id'),
\r
103 username = unicode(username),
\r
105 email = sxu.get('email', ''),
\r
106 is_superuser = sxu.get('usertypeid') == '5',
\r
107 is_staff = sxu.get('usertypeid') == '4',
\r
109 date_joined = readTime(sxu.get('creationdate')),
\r
110 about = sxu.get('aboutme', ''),
\r
111 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,
\r
112 email_isvalid = int(sxu.get('usertypeid')) > 2,
\r
113 website = sxu.get('websiteurl', ''),
\r
114 reputation = int(sxu.get('reputation')),
\r
115 gold = int(badges['1']),
\r
116 silver = int(badges['2']),
\r
117 bronze = int(badges['3']),
\r
118 real_name = sxu.get('realname', ''),
\r
123 s = orm.SubscriptionSettings(user=osqau)
\r
126 uidmapper[osqau.id] = osqau.id
\r
128 new_about = sxu.get('aboutme', None)
\r
129 if new_about and osqau.about != new_about:
\r
131 osqau.about = "%s\n|\n%s" % (osqau.about, new_about)
\r
133 osqau.about = new_about
\r
135 osqau.username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))
\r
136 osqau.email = sxu.get('email', '')
\r
137 osqau.reputation += int(sxu.get('reputation'))
\r
138 osqau.gold += int(badges['1'])
\r
139 osqau.silver += int(badges['2'])
\r
140 osqau.bronze += int(badges['3'])
\r
142 merged_users.append(osqau.id)
\r
145 user_by_name[osqau.username] = osqau
\r
147 openid = sxu.get('openid', None)
\r
148 if openid and openidre.match(openid):
\r
149 assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")
\r
152 if uidmapper[-1] == -1:
\r
155 return (uidmapper, merged_users)
\r
157 def tagsimport(dump, uidmap):
\r
158 tags = readTable(dump, "Tags")
\r
164 id = int(sxtag['id']),
\r
165 name = sxtag['name'],
\r
166 used_count = int(sxtag['count']),
\r
167 created_by_id = uidmap[sxtag.get('userid', 1)],
\r
171 tagmap[otag.name] = otag
\r
175 def postimport(dump, uidmap, tagmap):
\r
180 for h in readTable(dump, "PostHistory"):
\r
181 if not history.get(h.get('postid'), None):
\r
182 history[h.get('postid')] = []
\r
184 history[h.get('postid')].append(h)
\r
186 posts = readTable(dump, "Posts")
\r
188 for sxpost in posts:
\r
189 postclass = sxpost.get('posttypeid') == '1' and orm.Question or orm.Answer
\r
193 added_at = readTime(sxpost['creationdate']),
\r
194 body = sxpost['body'],
\r
195 score = sxpost.get('score', 0),
\r
197 vote_down_count = 0
\r
200 if sxpost.get('deletiondate', None):
\r
201 post.deleted = True
\r
202 post.deleted_at = readTime(sxpost['deletiondate'])
\r
205 post.author_id = uidmap[sxpost['owneruserid']]
\r
207 if sxpost.get('lasteditoruserid', None):
\r
208 post.last_edited_by_id = uidmap[sxpost.get('lasteditoruserid')]
\r
209 post.last_edited_at = readTime(sxpost['lasteditdate'])
\r
211 if sxpost.get('communityowneddate', None):
\r
213 post.wikified_at = readTime(sxpost['communityowneddate'])
\r
215 if sxpost.get('posttypeid') == '1': #question
\r
216 post.node_type = "question"
\r
217 post.title = sxpost['title']
\r
219 tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à', '')
\r
220 post.tagnames = tagnames
\r
222 post.view_count = sxpost.get('viewcount', 0)
\r
223 post.favourite_count = sxpost.get('favoritecount', 0)
\r
224 post.answer_count = sxpost.get('answercount', 0)
\r
226 if sxpost.get('lastactivityuserid', None):
\r
227 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]
\r
228 post.last_activity_at = readTime(sxpost['lastactivitydate'])
\r
230 if sxpost.get('closeddate', None):
\r
232 post.closed_by_id = 1
\r
233 post.closed_at = datetime.now()
\r
235 if sxpost.get('acceptedanswerid', None):
\r
236 post.accepted_answer_id = int(sxpost.get('acceptedanswerid'))
\r
237 accepted[int(sxpost.get('acceptedanswerid'))] = post
\r
240 post.node_type = "answer"
\r
241 post.parent_id = sxpost['parentid']
\r
243 if int(post.id) in accepted:
\r
244 post.accepted = True
\r
245 post.accepted_at = datetime.now()
\r
246 post.accepted_by_id = accepted[int(post.id)].author_id
\r
248 all[int(post.id)] = post
\r
252 def comment_import(dump, uidmap, posts):
\r
253 comments = readTable(dump, "PostComments")
\r
254 currid = max(posts.keys())
\r
257 for sxc in comments:
\r
261 node_type = "comment",
\r
262 added_at = readTime(sxc['creationdate']),
\r
263 author_id = uidmap[sxc.get('userid', 1)],
\r
264 body = sxc['text'],
\r
265 parent_id = sxc.get('postid'),
\r
267 vote_down_count = 0
\r
270 if sxc.get('deletiondate', None):
\r
272 oc.deleted_at = readTime(sxc['deletiondate'])
\r
273 oc.deleted_by_id = uidmap[sxc['deletionuserid']]
\r
274 oc.author_id = uidmap[sxc['deletionuserid']]
\r
276 oc.author_id = uidmap[sxc.get('userid', 1)]
\r
280 mapping[int(sxc['id'])] = int(oc.id)
\r
282 return posts, mapping
\r
285 def save_posts(posts, tagmap):
\r
286 for post in posts.values():
\r
289 if post.node_type == "question":
\r
290 tags = filter(lambda t: t is not None, [tagmap.get(n, None) for n in post.tagnames.split()])
\r
291 post.tagnames = " ".join([t.name for t in tags]).strip()
\r
294 create_and_activate_revision(post)
\r
297 def create_and_activate_revision(post):
\r
298 rev = orm.NodeRevision(
\r
299 author_id = post.author_id,
\r
302 revised_at = post.added_at,
\r
304 summary = 'Initial revision',
\r
305 tagnames = post.tagnames,
\r
306 title = post.title,
\r
310 post.active_revision_id = rev.id
\r
314 def post_vote_import(dump, uidmap, posts):
\r
315 votes = readTable(dump, "Posts2Votes")
\r
318 if sxv['votetypeid'] in ('2', '3'):
\r
320 node_id = sxv['postid'],
\r
321 user_id = uidmap[sxv['userid']],
\r
322 voted_at = readTime(sxv['creationdate']),
\r
323 vote = sxv['votetypeid'] == '2' and 1 or -1,
\r
326 if sxv['votetypeid'] == '2':
\r
327 posts[int(sxv['postid'])].vote_up_count += 1
\r
329 posts[int(sxv['postid'])].vote_down_count += 1
\r
333 def comment_vote_import(dump, uidmap, comments, posts):
\r
334 votes = readTable(dump, "Comments2Votes")
\r
337 if sxv['votetypeid'] in ('2', '3'):
\r
339 node_id = comments[int(sxv['postcommentid'])],
\r
340 user_id = uidmap[sxv['userid']],
\r
341 voted_at = readTime(sxv['creationdate']),
\r
342 vote = sxv['votetypeid'] == '2' and 1 or -1,
\r
345 if sxv['votetypeid'] == '2':
\r
346 posts[comments[int(sxv['postcommentid'])]].vote_up_count += 1
\r
348 posts[comments[int(sxv['postcommentid'])]].vote_down_count += 1
\r
354 def badges_import(dump, uidmap):
\r
355 node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')
\r
356 obadges = dict([(b.slug, b) for b in orm.Badge.objects.all()])
\r
357 sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])
\r
361 for id, sxb in sxbadges.items():
\r
362 slug = slugify(sxb['name'].replace('&', 'and'))
\r
363 if slug in obadges:
\r
364 sx_to_osqa[id] = obadges[slug]
\r
367 name = sxb['name'],
\r
368 slug = slugify(sxb['name']),
\r
369 description = sxb['description'],
\r
370 multiple = sxb.get('single', 'false') == 'false',
\r
372 type = sxb['class']
\r
375 sx_to_osqa[id] = osqab
\r
377 sxawards = readTable(dump, "Users2Badges")
\r
380 for sxa in sxawards:
\r
381 badge = sx_to_osqa[int(sxa['badgeid'])]
\r
383 user_id = uidmap[sxa['userid']],
\r
385 content_type = node_ctype,
\r
389 osqaawards.append(osqaa)
\r
390 badge.awarded_count += 1
\r
392 for b in sx_to_osqa.values():
\r
395 for a in osqaawards:
\r
399 def reset_sequences():
\r
400 from south.db import db
\r
401 if db.backend_name == "postgres":
\r
402 db.start_transaction()
\r
403 db.execute_many(PG_SEQUENCE_RESETS)
\r
404 db.commit_transaction()
\r
406 def sximport(dump, options):
\r
407 uidmap, merged_users = userimport(dump, options)
\r
408 tagmap = tagsimport(dump, uidmap)
\r
409 posts = postimport(dump, uidmap, tagmap)
\r
410 posts, comments = comment_import(dump, uidmap, posts)
\r
411 save_posts(posts, tagmap)
\r
412 post_vote_import(dump, uidmap, posts)
\r
413 comment_vote_import(dump, uidmap, comments, posts)
\r
414 for post in posts.values():
\r
416 badges_import(dump, uidmap)
\r
418 from south.db import db
\r
419 db.commit_transaction()
\r
425 PG_SEQUENCE_RESETS = """
\r
426 SELECT setval('"auth_user_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user";
\r
427 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";
\r
428 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";
\r
429 SELECT setval('"activity_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "activity";
\r
430 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";
\r
431 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";
\r
432 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";
\r
433 SELECT setval('"tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "tag";
\r
434 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";
\r
435 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";
\r
436 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
437 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";
\r
438 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
439 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
440 SELECT setval('"favorite_question_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "favorite_question";
\r
441 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";
\r
442 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
443 SELECT setval('"vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "vote";
\r
444 SELECT setval('"flagged_item_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "flagged_item";
\r
445 SELECT setval('"badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "badge";
\r
446 SELECT setval('"award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "award";
\r
447 SELECT setval('"repute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "repute";
\r
448 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";
\r
449 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";
\r
450 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";
\r
451 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";
\r