1 import os, tarfile, datetime, ConfigParser, logging
3 from django.utils.translation import ugettext as _
4 from django.core.cache import cache
6 from south.db import db
8 from xml.sax import make_parser
9 from xml.sax.handler import ContentHandler, ErrorHandler
11 from forum.templatetags.extra_tags import diff_date
13 from exporter import TMP_FOLDER, DATETIME_FORMAT, DATE_FORMAT, META_INF_SECTION, CACHE_KEY
19 class ContentElement():
20 def __init__(self, content):
21 self._content = content
24 return self._content.strip()
27 return self.content() == "true"
29 def as_date(self, default=NO_DEFAULT):
31 return datetime.datetime.strptime(self.content(), DATE_FORMAT)
33 if default == NO_DEFAULT:
34 return datetime.date.fromtimestamp(0)
39 def as_datetime(self, default=NO_DEFAULT):
41 return datetime.datetime.strptime(self.content(), DATETIME_FORMAT)
43 if default == NO_DEFAULT:
44 return datetime.datetime.fromtimestamp(0)
48 def as_int(self, default=0):
50 return int(self.content())
58 class RowElement(ContentElement):
59 def __init__(self, name, attrs, parent=None):
60 self.name = name.lower()
62 self.attrs = dict([(k.lower(), ContentElement(v)) for k, v in attrs.items()])
64 self.sub_elements = {}
69 def add_to_content(self, ch):
70 self._content += unicode(ch)
73 curr = self.sub_elements.get(sub.name, None)
77 self.sub_elements[sub.name] = curr
81 def get(self, name, default=None):
82 return self.sub_elements.get(name.lower(), [default])[-1]
84 def get_list(self, name):
85 return self.sub_elements.get(name.lower(), [])
87 def get_listc(self, name):
88 return [r.content() for r in self.get_list(name)]
90 def getc(self, name, default=""):
91 el = self.get(name, None)
98 def get_attr(self, name, default=""):
99 return self.attrs.get(name.lower(), default)
101 def as_pickled(self, default=None):
102 value_el = self.get('value')
105 return value_el._as_pickled(default)
109 TYPES_MAP = dict([(c.__name__, c) for c in (int, long, str, unicode, float)])
111 def _as_pickled(self, default=None):
112 type = self.get_attr('type').content()
116 return dict([ (item.get_attr('key'), item.as_pickled()) for item in self.get_list('item') ])
118 return [item.as_pickled() for item in self.get_list('item')]
120 return self.content().lower() == 'true'
121 elif type in RowElement.TYPES_MAP:
122 return RowElement.TYPES_MAP[type](self.content())
124 return self.content()
131 class TableHandler(ContentHandler):
132 def __init__(self, root_name, row_name, callback, callback_args = [], ping = None):
133 self.root_name = root_name.lower()
134 self.row_name = row_name.lower()
135 self.callback = callback
136 self.callback_args = callback_args
142 self.curr_element = None
145 def startElement(self, name, attrs):
148 if name == self.root_name.lower():
150 elif name == self.row_name:
151 self.curr_element = RowElement(name, attrs)
153 self.curr_element = RowElement(name, attrs, self.curr_element)
155 def characters(self, ch):
156 if self.curr_element:
157 self.curr_element.add_to_content(ch)
159 def endElement(self, name):
162 if name == self.root_name:
164 elif name == self.row_name:
165 self.callback(self.curr_element, *self.callback_args)
171 self.curr_element = self.curr_element.parent
174 class SaxErrorHandler(ErrorHandler):
178 def fatalError(self, e):
181 def warning(self, e):
184 def disable_triggers():
185 if db.backend_name == "postgres":
186 db.start_transaction()
187 db.execute_many(commands.PG_DISABLE_TRIGGERS)
188 db.commit_transaction()
190 def enable_triggers():
191 if db.backend_name == "postgres":
192 db.start_transaction()
193 db.execute_many(commands.PG_ENABLE_TRIGGERS)
194 db.commit_transaction()
196 def reset_sequences():
197 if db.backend_name == "postgres":
198 db.start_transaction()
199 db.execute_many(commands.PG_SEQUENCE_RESETS)
200 db.commit_transaction()
204 def start_import(fname, user):
206 start_time = datetime.datetime.now()
207 steps = [s for s in FILE_HANDLERS]
209 with open(os.path.join(TMP_FOLDER, 'backup.inf'), 'r') as inffile:
210 inf = ConfigParser.SafeConfigParser()
213 state = dict([(s['id'], {
214 'status': _('Queued'), 'count': int(inf.get(META_INF_SECTION, s['id'])), 'parsed': 0
215 }) for s in steps] + [
217 'status': _('Starting'), 'count': int(inf.get(META_INF_SECTION, 'overall')), 'parsed': 0
221 full_state = dict(running=True, state=state, time_started="")
224 full_state['time_started'] = diff_date(start_time)
225 cache.set(CACHE_KEY, full_state)
229 def ping_state(name):
230 state[name]['parsed'] += 1
231 state['overall']['parsed'] += 1
238 state['overall']['status'] = _('Importing %s') % s['name']
239 state[name]['status'] = _('Importing')
242 fn(TMP_FOLDER, user, ping)
244 state[name]['status'] = _('Done')
250 #dump = tarfile.open(fname, 'r')
251 #dump.extractall(TMP_FOLDER)
256 db.start_transaction()
258 for h in FILE_HANDLERS:
259 run(h['fn'], h['id'])
262 db.commit_transaction()
267 full_state['running'] = False
268 full_state['errors'] = "%s: %s" % (e.__class__.__name__, unicode(e))
272 logging.error("Error executing xml import: \n %s" % (traceback.format_exc()))
274 def file_handler(file_name, root_tag, el_tag, name, args_handler=None, pre_callback=None, post_callback=None):
276 def decorated(location, current_user, ping):
278 pre_callback(current_user)
281 args = args_handler(current_user)
285 parser = make_parser()
286 handler = TableHandler(root_tag, el_tag, fn, args, ping)
287 parser.setContentHandler(handler)
288 #parser.setErrorHandler(SaxErrorHandler())
290 parser.parse(os.path.join(location, file_name))
295 FILE_HANDLERS.append(dict(id=root_tag, name=name, fn=decorated))
300 @file_handler('users.xml', 'users', 'user', _('Users'), args_handler=lambda u: [u])
301 def user_import(row, current_user):
304 if str(current_user.id) == row.getc('id'):
307 roles = row.get('roles').get_listc('role')
308 valid_email = row.get('email').get_attr('validated').as_bool()
309 badges = row.get('badges')
313 username = row.getc('username'),
314 password = row.getc('password'),
315 email = row.getc('email'),
316 email_isvalid= valid_email,
317 is_superuser = 'superuser' in roles,
318 is_staff = 'moderator' in roles,
320 date_joined = row.get('joindate').as_datetime(),
321 about = row.getc('bio'),
322 date_of_birth = row.get('birthdate').as_date(None),
323 website = row.getc('website'),
324 reputation = row.get('reputation').as_int(),
325 gold = badges.get_attr('gold').as_int(),
326 silver = badges.get_attr('silver').as_int(),
327 bronze = badges.get_attr('bronze').as_int(),
328 real_name = row.getc('realname'),
329 location = row.getc('location'),
334 authKeys = row.get('authKeys')
336 for key in authKeys.get_list('key'):
337 orm.AuthKeyUserAssociation(user=user, key=key.getc('key'), provider=key.getc('provider')).save()
339 notifications = row.get('notifications')
341 attributes = dict([(str(k), v.as_bool() and 'i' or 'n') for k, v in notifications.get('notify').attrs.items()])
342 attributes.update(dict([(str(k), v.as_bool()) for k, v in notifications.get('autoSubscribe').attrs.items()]))
343 attributes.update(dict([(str("notify_%s" % k), v.as_bool()) for k, v in notifications.get('notifyOnSubscribed').attrs.items()]))
345 ss = orm.SubscriptionSettings(user=user, enable_notifications=notifications.get_attr('enabled').as_bool(), **attributes)
348 ss.id = current_user.subscription_settings.id
353 def pre_tag_import(user):
354 tag_import.tag_mappings={}
357 @file_handler('tags.xml', 'tags', 'tag', _('Tags'), pre_callback=pre_tag_import)
359 tag = orm.Tag(name=row.getc('name'), used_count=row.get('used').as_int(), created_by_id=row.get('author').as_int())
361 tag_import.tag_mappings[tag.name] = tag
364 def post_node_import():
365 tag_import.tag_mappings = None
367 @file_handler('nodes.xml', 'nodes', 'node', _('Nodes'), args_handler=lambda u: [tag_import.tag_mappings], post_callback=post_node_import)
368 def node_import(row, tags):
372 for t in row.get('tags').get_list('tag'):
373 ntags.append(tags[t.content()])
375 last_act = row.get('lastactivity')
379 node_type = row.getc('type'),
380 author_id = row.get('author').as_int(),
381 added_at = row.get('date').as_datetime(),
382 parent_id = row.get('parent').as_int(None),
383 abs_parent_id = row.get('absparent').as_int(None),
384 score = row.get('score').as_int(0),
386 last_activity_by_id = last_act.get('by').as_int(None),
387 last_activity_at = last_act.get('at').as_datetime(None),
389 title = row.getc('title'),
390 body = row.getc('body'),
391 tagnames = " ".join([t.name for t in ntags]),
393 marked = row.get('marked').as_bool(),
394 extra_ref_id = row.get('extraRef').as_int(None),
395 extra_count = row.get('extraCount').as_int(0),
396 extra = row.get('extraData').as_pickled()
402 revisions = row.get('revisions')
403 active = revisions.get_attr('active').as_int()
405 for r in revisions.get_list('revision'):
406 rev = orm.NodeRevision(
407 author_id = r.getc('author'),
408 body = r.getc('body'),
410 revised_at = r.get('date').as_datetime(),
411 revision = r.get('number').as_int(),
412 summary = r.getc('summary'),
413 tagnames = " ".join(r.getc('tags').split(',')),
414 title = r.getc('title'),
418 if rev.revision == active:
421 node.active_revision = active
426 def post_action(*types):
433 def post_action_import_callback():
434 with_state = orm.Node.objects.filter(id__in=orm.NodeState.objects.values_list('node_id', flat=True).distinct())
437 n.state_string = "".join(["(%s)" % s for s in n.states.values_list('state_type')])
440 @file_handler('actions.xml', 'actions', 'action', _('Actions'), post_callback=post_action_import_callback)
441 def actions_import(row):
443 id = row.get('id').as_int(),
444 action_type = row.getc('type'),
445 action_date = row.get('date').as_datetime(),
446 node_id = row.get('node').as_int(None),
447 user_id = row.get('user').as_int(),
448 real_user_id = row.get('realUser').as_int(None),
450 extra = row.get('extraData').as_pickled(),
453 canceled = row.get('canceled')
454 if canceled.get_attr('state').as_bool():
455 action.canceled = True
456 action.canceled_by_id = canceled.get('user').as_int()
457 #action.canceled_at = canceled.get('date').as_datetime(),
458 action.canceled_ip = canceled.getc('ip')
462 for r in row.get('reputes').get_list('repute'):
463 by_canceled = r.get_attr('byCanceled').as_bool()
467 user_id = r.get('user').as_int(),
468 value = r.get('value').as_int(),
470 date = by_canceled and action.canceled_at or action.action_date,
471 by_canceled = by_canceled
474 if (not action.canceled) and (action.action_type in POST_ACTION):
475 POST_ACTION[action.action_type](row, action)
480 @post_action('voteup', 'votedown', 'voteupcomment')
481 def vote_action(row, action):
482 orm.Vote(user_id=action.user_id, node_id=action.node_id, action=action,
483 voted_at=action.action_date, value=(action.action_type != 'votedown') and 1 or -1).save()
485 def state_action(state):
489 node_id = action.node_id,
494 post_action('wikify')(state_action('wiki'))
495 post_action('delete')(state_action('deleted'))
496 post_action('acceptanswer')(state_action('accepted'))
497 post_action('publish')(state_action('published'))
501 def flag_action(row, action):
502 orm.Flag(user_id=action.user_id, node_id=action.node_id, action=action, reason=action.extra).save()
505 def award_import_args(user):
506 return [ dict([ (b.cls, b) for b in orm.Badge.objects.all() ]) ]
509 @file_handler('awards.xml', 'awards', 'award', _('Awards'), args_handler=award_import_args)
510 def awards_import(row, badges):
512 badge_type = badges.get(row.getc('badge'), None)
518 user_id = row.get('user').as_int(),
519 badge = badges[row.getc('badge')],
520 node_id = row.get('node').as_int(None),
521 action_id = row.get('action').as_int(None),
522 trigger_id = row.get('trigger').as_int(None)
528 @file_handler('settings.xml', 'settings', 'setting', _('Settings'))
529 def settings_import(row):
530 orm.KeyValue(key=row.getc('key'), value=row.get('value').as_pickled())