
来源:互联网 发布:淘宝天猫lee鼎汉店真假 编辑:程序博客网 时间:2024/06/11 11:02





compositeName分别为hint_id 由产生hint数据的时间生成的uuid;和message_version为产生hint数据的时候数据的版本号;


 * The hint schema looks like this:
 * CREATE TABLE hints (
 *   target_id uuid,
 *   hint_id timeuuid,
 *   message_version int,
 *   mutation blob,
 *   PRIMARY KEY (target_id, hint_id, message_version)


/**     * Returns mutation representing a Hints to be sent to <code>address</code>     * as soon as it becomes available.  See HintedHandoffManager for more details.     */    public static RowMutation hintFor(RowMutation mutation, UUID targetId) throws IOException    {        RowMutation rm = new RowMutation(Table.SYSTEM_KS, UUIDType.instance.decompose(targetId));        UUID hintId = UUIDGen.getTimeUUID();        // determine the TTL for the RowMutation        // this is set at the smallest GCGraceSeconds for any of the CFs in the RM        // this ensures that deletes aren't "undone" by delivery of an old hint        int ttl = Integer.MAX_VALUE;        for (ColumnFamily cf : mutation.getColumnFamilies())            ttl = Math.min(ttl, cf.metadata().getGcGraceSeconds());        // serialize the hint with id and version as a composite column name        QueryPath path = new QueryPath(SystemTable.HINTS_CF, null, HintedHandOffManager.comparator.decompose(hintId, MessagingService.current_version));        rm.add(path, ByteBuffer.wrap(FBUtilities.serialize(mutation, serializer, MessagingService.current_version)), System.currentTimeMillis(), ttl);        return rm;    }






public static boolean shouldHint(InetAddress ep)    {        if (!DatabaseDescriptor.hintedHandoffEnabled())        {            HintedHandOffManager.instance.metrics.incrPastWindow(ep);            return false;        }        boolean hintWindowExpired = Gossiper.instance.getEndpointDowntime(ep) > DatabaseDescriptor.getMaxHintWindow();        if (hintWindowExpired)        {            HintedHandOffManager.instance.metrics.incrPastWindow(ep);            logger.trace("not hinting {} which has been down {}ms", ep, Gossiper.instance.getEndpointDowntime(ep));        }        return !hintWindowExpired;    }


 public static Future<Void> submitHint(final RowMutation mutation,                                          final InetAddress target,                                          final AbstractWriteResponseHandler responseHandler,                                          final ConsistencyLevel consistencyLevel)    {        // local write that time out should be handled by LocalMutationRunnable        assert !target.equals(FBUtilities.getBroadcastAddress()) : target;        HintRunnable runnable = new HintRunnable(target)        {            public void runMayThrow() throws IOException            {                logger.debug("Adding hint for {}", target);                writeHintForMutation(mutation, target);                // Notify the handler only for CL == ANY                if (responseHandler != null && consistencyLevel == ConsistencyLevel.ANY)                    responseHandler.response(null);            }        };        return submitHint(runnable);    }    private static Future<Void> submitHint(HintRunnable runnable)    {        totalHintsInProgress.incrementAndGet();        hintsInProgress.get(;        return (Future<Void>) StageManager.getStage(Stage.MUTATION).submit(runnable);    }

系统记录的正在运行的hint任务个数,以及异常节点产生的hint任务是为了防止正常节点因为处理hint数据过多导致系统内存溢出,也需要保护在线的节点,所以如果正在运行的hint任务不能超过1024 * FBUtilities.getAvailableProcessors();

// avoid OOMing due to excess hints.  we need to do this check even for "live" nodes, since we can            // still generate hints for those if it's overloaded or simply dead but not yet known-to-be-dead.            // The idea is that if we have over maxHintsInProgress hints in flight, this is probably due to            // a small number of nodes causing problems, so we should avoid shutting down writes completely to            // healthy nodes.  Any node with no hintsInProgress is considered healthy.            if (totalHintsInProgress.get() > maxHintsInProgress                && (hintsInProgress.get(destination).get() > 0 && shouldHint(destination)))            {                throw new OverloadedException("Too many in flight hints: " + totalHintsInProgress.get());            }






然后只需要用其rowkey换成为节点ip,调用HintedHandoff线程池中的分发线程进行数据分发,HintedHandoff线程池中的线程个数由cassandra.yaml文件中的max_hints_delivery_threads: 2决定。

 Runnable runnable = new Runnable()        {            public void run()            {                scheduleAllDeliveries();                metrics.log();            }        };        StorageService.optionalTasks.scheduleWithFixedDelay(runnable, 10, 10, TimeUnit.MINUTES);
单个节点的数据发送,如果判断hintedHandOff暂停了以后,则也不会进行发送,这个暂停可以由nodetool中的PAUSEHANDOFF命令暂停,由RESUMEHANDOFF 命令恢复。





(4)等制定节点的所有数据发送完成以后,就会强制flush hints系统表,将hint的所有SStable进行一次全量SSTable的compact。

private void deliverHintsToEndpointInternal(InetAddress endpoint) throws IOException, DigestMismatchException, InvalidRequestException, InterruptedException    {        ColumnFamilyStore hintStore =;        if (hintStore.isEmpty())            return; // nothing to do, don't confuse users by logging a no-op handoff        // check if hints delivery has been paused        if (hintedHandOffPaused)        {            logger.debug("Hints delivery process is paused, aborting");            return;        }        logger.debug("Checking remote({}) schema before delivering hints", endpoint);        try        {            waitForSchemaAgreement(endpoint);        }        catch (TimeoutException e)        {            return;        }        if (!FailureDetector.instance.isAlive(endpoint))        {            logger.debug("Endpoint {} died before hint delivery, aborting", endpoint);            return;        }        // 1. Get the key of the endpoint we need to handoff        // 2. For each column, deserialize the mutation and send it to the endpoint        // 3. Delete the subcolumn if the write was successful        // 4. Force a flush        // 5. Do major compaction to clean up all deletes etc.        // find the hints for the node using its token.        UUID hostId = Gossiper.instance.getHostId(endpoint);"Started hinted handoff for host: {} with IP: {}", hostId, endpoint);        final ByteBuffer hostIdBytes = ByteBuffer.wrap(UUIDGen.decompose(hostId));        DecoratedKey epkey =  StorageService.getPartitioner().decorateKey(hostIdBytes);        final AtomicInteger rowsReplayed = new AtomicInteger(0);        ByteBuffer startColumn = ByteBufferUtil.EMPTY_BYTE_BUFFER;        int pageSize = PAGE_SIZE;        // read less columns (mutations) per page if they are very large        if (hintStore.getMeanColumns() > 0)        {            int averageColumnSize = (int) (hintStore.getMeanRowSize() / hintStore.getMeanColumns());            pageSize = Math.min(PAGE_SIZE, DatabaseDescriptor.getInMemoryCompactionLimit() / averageColumnSize);            pageSize = Math.max(2, pageSize); // page size of 1 does not allow actual paging b/c of >= behavior on startColumn            logger.debug("average hinted-row column size is {}; using pageSize of {}", averageColumnSize, pageSize);        }        // rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml).        int throttleInKB = DatabaseDescriptor.getHintedHandoffThrottleInKB();        RateLimiter rateLimiter = RateLimiter.create(throttleInKB == 0 ? Double.MAX_VALUE : throttleInKB * 1024);        while (true)        {            // check if hints delivery has been paused during the process            if (hintedHandOffPaused)            {                logger.debug("Hints delivery process is paused, aborting");                break;            }            QueryFilter filter = QueryFilter.getSliceFilter(epkey, new QueryPath(SystemTable.HINTS_CF), startColumn, ByteBufferUtil.EMPTY_BYTE_BUFFER, false, pageSize);            ColumnFamily hintsPage = ColumnFamilyStore.removeDeleted(hintStore.getColumnFamily(filter), (int)(System.currentTimeMillis() / 1000));            if (pagingFinished(hintsPage, startColumn))            {                if (ByteBufferUtil.EMPTY_BYTE_BUFFER.equals(startColumn))                {                    // we've started from the beginning and could not find anything (only maybe tombstones)                    break;                }                else                {                    // restart query from the first column until we read an empty row;                    // that will tell us everything was delivered successfully with no timeouts                    startColumn = ByteBufferUtil.EMPTY_BYTE_BUFFER;                    continue;                }            }            for (final IColumn hint : hintsPage.getSortedColumns())            {                // Skip tombstones:                // if we iterate quickly enough, it's possible that we could request a new page in the same millisecond                // in which the local deletion timestamp was generated on the last column in the old page, in which                // case the hint will have no columns (since it's deleted) but will still be included in the resultset                // since (even with gcgs=0) it's still a "relevant" tombstone.                if (!hint.isLive())                    continue;                if (hintedHandOffPaused)                {                    logger.debug("Hints delivery process is paused, aborting");                    break;                }                startColumn =;                ByteBuffer[] components = comparator.split(;                int version = Int32Type.instance.compose(components[1]);                DataInputStream in = new DataInputStream(ByteBufferUtil.inputStream(hint.value()));                RowMutation rm;                try                {                    rm = RowMutation.serializer.deserialize(in, version);                }                catch (UnknownColumnFamilyException e)                {                    logger.debug("Skipping delivery of hint for deleted columnfamily", e);                    deleteHint(hostIdBytes,, hint.maxTimestamp());                    continue;                }                MessageOut<RowMutation> message = rm.createMessage();                rateLimiter.acquire(message.serializedSize(MessagingService.current_version));                WrappedRunnable callback = new WrappedRunnable()                {                    public void runMayThrow() throws IOException                    {                        rowsReplayed.incrementAndGet();                        deleteHint(hostIdBytes,, hint.maxTimestamp());                    }                };                IAsyncCallback responseHandler = new WriteResponseHandler(endpoint, WriteType.UNLOGGED_BATCH, callback);                MessagingService.instance().sendRR(message, endpoint, responseHandler);            }            // check if node is still alive and we should continue delivery process            if (!FailureDetector.instance.isAlive(endpoint))            {                logger.debug("Endpoint {} died during hint delivery, aborting", endpoint);                return;            }        }        try        {            compact().get();        }        catch (Exception e)        {            throw new RuntimeException(e);        }"Finished hinted handoff of %s rows to endpoint %s", rowsReplayed, endpoint));        if (hintedHandOffPaused)        {  "Hints delivery process is paused, not delivering further hints");        }    }


