compute manager(1) :init_host(self)(没写完)

来源:互联网 发布:tera mac 编辑:程序博客网 时间:2024/06/10 02:38

初始化host :
步骤:

  1. 加载whitelist (不知道这个是干什么用的)
  2. 检测 host placement(如果placement API不可用,则应用抛出异常。
  3. host compute driver层的初始化
  4. 通过InstanceList 类获取该host上的示例列表instaces
  5. 检测是否延迟IP绑定,如果时,通过driver配置
  6. init_virt_events ,不知道具体干什么的
  7. 调用_init_instance函数,初始化检测host上的每个instance.
def init_host(self):        """Initialization for a standalone compute service."""        if CONF.pci.passthrough_whitelist:            # Simply loading the PCI passthrough whitelist will do a bunch of            # validation that would otherwise wait until the PciDevTracker is            # constructed when updating available resources for the compute            # node(s) in the resource tracker, effectively killing that task.            # So load up the whitelist when starting the compute service to            # flush any invalid configuration early so we can kill the service            # if the configuration is wrong.            whitelist.Whitelist(CONF.pci.passthrough_whitelist)        # NOTE(sbauza): We want the compute node to hard fail if it can't be        # able to provide its resources to the placement API, or it would not        # be able to be eligible as a destination.        if CONF.placement.os_region_name is None:            raise exception.PlacementNotConfigured()        self.driver.init_host(host=self.host)        context = nova.context.get_admin_context()        instances = objects.InstanceList.get_by_host(            context, self.host, expected_attrs=['info_cache', 'metadata'])        if CONF.defer_iptables_apply:            self.driver.filter_defer_apply_on()        self.init_virt_events()        try:            # checking that instance was not already evacuated to other host            self._destroy_evacuated_instances(context)            for instance in instances:                self._init_instance(context, instance)        finally:            if CONF.defer_iptables_apply:                self.driver.filter_defer_apply_off()            self._update_scheduler_instance_info(context, instances)

_init_instance 函数执行步骤:
1. 如果该instance 被转移了,则记录log后直接返回,否则继续执行;
2. 如果该instance 被关闭了或是处于错误状态(可能是由于在迁移中或是删除中),则记录log后直接返回,否则继续执行;
3. 如果instance处于DELETED状态中,则

def _init_instance(self, context, instance):        '''Initialize this instance during service init.'''        if instance.host != self.host:            LOG.warning(_LW('Instance %(uuid)s appears to not be owned '                            'by this host, but by %(host)s. Startup '                            'processing is being skipped.'),                        {'uuid': instance.uuid,                         'host': instance.host})            return        if (instance.vm_state == vm_states.SOFT_DELETED or            (instance.vm_state == vm_states.ERROR and            instance.task_state not in            (task_states.RESIZE_MIGRATING, task_states.DELETING))):            LOG.debug("Instance is in %s state.",                      instance.vm_state, instance=instance)            return        if instance.vm_state == vm_states.DELETED:            try:                self._complete_partial_deletion(context, instance)            except Exception:                # we don't want that an exception blocks the init_host                msg = _LE('Failed to complete a deletion')                LOG.exception(msg, instance=instance)            return        if (instance.vm_state == vm_states.BUILDING or            instance.task_state in [task_states.SCHEDULING,                                    task_states.BLOCK_DEVICE_MAPPING,                                    task_states.NETWORKING,                                    task_states.SPAWNING]):            # NOTE(dave-mcnally) compute stopped before instance was fully            # spawned so set to ERROR state. This is safe to do as the state            # may be set by the api but the host is not so if we get here the            # instance has already been scheduled to this particular host.            LOG.debug("Instance failed to spawn correctly, "                      "setting to ERROR state", instance=instance)            instance.task_state = None            instance.vm_state = vm_states.ERROR            instance.save()            return        if (instance.vm_state in [vm_states.ACTIVE, vm_states.STOPPED] and            instance.task_state in [task_states.REBUILDING,                                    task_states.REBUILD_BLOCK_DEVICE_MAPPING,                                    task_states.REBUILD_SPAWNING]):            # NOTE(jichenjc) compute stopped before instance was fully            # spawned so set to ERROR state. This is consistent to BUILD            LOG.debug("Instance failed to rebuild correctly, "                      "setting to ERROR state", instance=instance)            instance.task_state = None            instance.vm_state = vm_states.ERROR            instance.save()            return        if (instance.vm_state != vm_states.ERROR and            instance.task_state in [task_states.IMAGE_SNAPSHOT_PENDING,                                    task_states.IMAGE_PENDING_UPLOAD,                                    task_states.IMAGE_UPLOADING,                                    task_states.IMAGE_SNAPSHOT]):            LOG.debug("Instance in transitional state %s at start-up "                      "clearing task state",                      instance.task_state, instance=instance)            try:                self._post_interrupted_snapshot_cleanup(context, instance)            except Exception:                # we don't want that an exception blocks the init_host                msg = _LE('Failed to cleanup snapshot.')                LOG.exception(msg, instance=instance)            instance.task_state = None            instance.save()        if (instance.vm_state != vm_states.ERROR and            instance.task_state in [task_states.RESIZE_PREP]):            LOG.debug("Instance in transitional state %s at start-up "                      "clearing task state",                      instance['task_state'], instance=instance)            instance.task_state = None            instance.save()        if instance.task_state == task_states.DELETING:            try:                LOG.info(_LI('Service started deleting the instance during '                             'the previous run, but did not finish. Restarting'                             ' the deletion now.'), instance=instance)                instance.obj_load_attr('metadata')                instance.obj_load_attr('system_metadata')                bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(                        context, instance.uuid)                project_id, user_id = objects.quotas.ids_from_instance(                    context, instance)                quotas = self._create_reservations(context, instance,                                                   project_id, user_id)                self._delete_instance(context, instance, bdms, quotas)            except Exception:                # we don't want that an exception blocks the init_host                msg = _LE('Failed to complete a deletion')                LOG.exception(msg, instance=instance)                self._set_instance_obj_error_state(context, instance)            return        current_power_state = self._get_power_state(context, instance)        try_reboot, reboot_type = self._retry_reboot(context, instance,                                                     current_power_state)        if try_reboot:            LOG.debug("Instance in transitional state (%(task_state)s) at "                      "start-up and power state is (%(power_state)s), "                      "triggering reboot",                      {'task_state': instance.task_state,                       'power_state': current_power_state},                      instance=instance)            # NOTE(mikal): if the instance was doing a soft reboot that got as            # far as shutting down the instance but not as far as starting it            # again, then we've just become a hard reboot. That means the            # task state for the instance needs to change so that we're in one            # of the expected task states for a hard reboot.            soft_types = [task_states.REBOOT_STARTED,                          task_states.REBOOT_PENDING,                          task_states.REBOOTING]            if instance.task_state in soft_types and reboot_type == 'HARD':                instance.task_state = task_states.REBOOT_PENDING_HARD                instance.save()            self.reboot_instance(context, instance, block_device_info=None,                                 reboot_type=reboot_type)            return        elif (current_power_state == power_state.RUNNING and              instance.task_state in [task_states.REBOOT_STARTED,                                      task_states.REBOOT_STARTED_HARD,                                      task_states.PAUSING,                                      task_states.UNPAUSING]):            LOG.warning(_LW("Instance in transitional state "                            "(%(task_state)s) at start-up and power state "                            "is (%(power_state)s), clearing task state"),                        {'task_state': instance.task_state,                         'power_state': current_power_state},                        instance=instance)            instance.task_state = None            instance.vm_state = vm_states.ACTIVE            instance.save()        elif (current_power_state == power_state.PAUSED and              instance.task_state == task_states.UNPAUSING):            LOG.warning(_LW("Instance in transitional state "                            "(%(task_state)s) at start-up and power state "                            "is (%(power_state)s), clearing task state "                            "and unpausing the instance"),                        {'task_state': instance.task_state,                         'power_state': current_power_state},                        instance=instance)            try:                self.unpause_instance(context, instance)            except NotImplementedError:                # Some virt driver didn't support pause and unpause                pass            except Exception:                LOG.exception(_LE('Failed to unpause instance'),                              instance=instance)            return        if instance.task_state == task_states.POWERING_OFF:            try:                LOG.debug("Instance in transitional state %s at start-up "                          "retrying stop request",                          instance.task_state, instance=instance)                self.stop_instance(context, instance, True)            except Exception:                # we don't want that an exception blocks the init_host                msg = _LE('Failed to stop instance')                LOG.exception(msg, instance=instance)            return        if instance.task_state == task_states.POWERING_ON:            try:                LOG.debug("Instance in transitional state %s at start-up "                          "retrying start request",                          instance.task_state, instance=instance)                self.start_instance(context, instance)            except Exception:                # we don't want that an exception blocks the init_host                msg = _LE('Failed to start instance')                LOG.exception(msg, instance=instance)            return        net_info = compute_utils.get_nw_info_for_instance(instance)        try:            self.driver.plug_vifs(instance, net_info)        except NotImplementedError as e:            LOG.debug(e, instance=instance)        except exception.VirtualInterfacePlugException:            # we don't want an exception to block the init_host            LOG.exception(_LE("Vifs plug failed"), instance=instance)            self._set_instance_obj_error_state(context, instance)            return        if instance.task_state == task_states.RESIZE_MIGRATING:            # We crashed during resize/migration, so roll back for safety            try:                # NOTE(mriedem): check old_vm_state for STOPPED here, if it's                # not in system_metadata we default to True for backwards                # compatibility                power_on = (instance.system_metadata.get('old_vm_state') !=                            vm_states.STOPPED)                block_dev_info = self._get_instance_block_device_info(context,                                                                      instance)                self.driver.finish_revert_migration(context,                    instance, net_info, block_dev_info, power_on)            except Exception:                LOG.exception(_LE('Failed to revert crashed migration'),                              instance=instance)            finally:                LOG.info(_LI('Instance found in migrating state during '                             'startup. Resetting task_state'),                         instance=instance)                instance.task_state = None                instance.save()        if instance.task_state == task_states.MIGRATING:            # Live migration did not complete, but instance is on this            # host, so reset the state.            instance.task_state = None            instance.save(expected_task_state=[task_states.MIGRATING])        db_state = instance.power_state        drv_state = self._get_power_state(context, instance)        expect_running = (db_state == power_state.RUNNING and                          drv_state != db_state)        LOG.debug('Current state is %(drv_state)s, state in DB is '                  '%(db_state)s.',                  {'drv_state': drv_state, 'db_state': db_state},                  instance=instance)        if expect_running and CONF.resume_guests_state_on_host_boot:            LOG.info(_LI('Rebooting instance after nova-compute restart.'),                     instance=instance)            block_device_info = \                self._get_instance_block_device_info(context, instance)            try:                self.driver.resume_state_on_host_boot(                    context, instance, net_info, block_device_info)            except NotImplementedError:                LOG.warning(_LW('Hypervisor driver does not support '                                'resume guests'), instance=instance)            except Exception:                # NOTE(vish): The instance failed to resume, so we set the                #             instance to error and attempt to continue.                LOG.warning(_LW('Failed to resume instance'),                            instance=instance)                self._set_instance_obj_error_state(context, instance)        elif drv_state == power_state.RUNNING:            # VMwareAPI drivers will raise an exception            try:                self.driver.ensure_filtering_rules_for_instance(                                       instance, net_info)            except NotImplementedError:                LOG.debug('Hypervisor driver does not support '                          'firewall rules', instance=instance)
0 0