facebook presto源码解读(part1)

来源:互联网 发布:mac os x 安装软件 编辑:程序博客网 时间:2024/06/10 01:41

facebook presto源码解读(part1)

边学变卖,把代码查看结果汇报给各位读者,如有不对请指正。
part1主要是介绍整体的代码结构和大致的运行流程。

锅的代码是在2016年8月从官网下载的。整体结构如下:

这里写图片描述

presto 客户端代码在presto-cli模块
但presto服务端代码不在presto-server,打开presto-server模块可以发现他采用maven的插件进行编译打包,改模块只有该规则文件。
presto服务端代码在presto-main模块。PrestoServer类启动服务器。

下面介绍客户端连接服务器进行sql语句的运行。主要运行ddl语句和Select语句,还有insert语句。
client入口代码在Presto类

package com.facebook.presto.cli;import static io.airlift.airline.SingleCommand.singleCommand;public final class Presto{    private Presto() {}    public static void main(String[] args)            throws Exception    {        Console console = singleCommand(Console.class).parse(args);        if (console.helpOption.showHelpIfRequested() ||                console.versionOption.showVersionIfRequested()) {            return;        }        console.run();    }}

进入Console类的run方法,

 public void run()    {        ClientSession session = clientOptions.toClientSession();        KerberosConfig kerberosConfig = clientOptions.toKerberosConfig();        boolean hasQuery = !Strings.isNullOrEmpty(clientOptions.execute);        boolean isFromFile = !Strings.isNullOrEmpty(clientOptions.file);        if (!hasQuery || !isFromFile) {            AnsiConsole.systemInstall();        }        initializeLogging(clientOptions.logLevelsFile);        String query = clientOptions.execute;        if (hasQuery) {            query += ";";        }        if (isFromFile) {            if (hasQuery) {                throw new RuntimeException("both --execute and --file specified");            }            try {                query = Files.toString(new File(clientOptions.file), UTF_8);                hasQuery = true;            }            catch (IOException e) {                throw new RuntimeException(format("Error reading from file %s: %s", clientOptions.file, e.getMessage()));            }        }        try (QueryRunner queryRunner = QueryRunner.create(                session,                Optional.ofNullable(clientOptions.socksProxy),                Optional.ofNullable(clientOptions.keystorePath),                Optional.ofNullable(clientOptions.keystorePassword),                Optional.ofNullable(clientOptions.krb5Principal),                Optional.ofNullable(clientOptions.krb5RemoteServiceName),                clientOptions.authenticationEnabled,                kerberosConfig)) {            if (hasQuery) {                executeCommand(queryRunner, query, clientOptions.outputFormat);            }            else {                runConsole(queryRunner, session);            }        }    }

进入 executeCommand(queryRunner, query, clientOptions.outputFormat);方法:

  private static void executeCommand(QueryRunner queryRunner, String query, OutputFormat outputFormat)    {        StatementSplitter splitter = new StatementSplitter(query);        for (Statement split : splitter.getCompleteStatements()) {            if (!isEmptyStatement(split.statement())) {                process(queryRunner, split.statement(), outputFormat, false);            }        }        if (!isEmptyStatement(splitter.getPartialStatement())) {            System.err.println("Non-terminated statement: " + splitter.getPartialStatement());        }    }

进入process方法,

    private static void process(QueryRunner queryRunner, String sql, OutputFormat outputFormat, boolean interactive)    {        try (Query query = queryRunner.startQuery(sql)) {            query.renderOutput(System.out, outputFormat, interactive);            ClientSession session = queryRunner.getSession();            // update session properties if present            if (!query.getSetSessionProperties().isEmpty() || !query.getResetSessionProperties().isEmpty()) {                Map<String, String> sessionProperties = new HashMap<>(session.getProperties());                sessionProperties.putAll(query.getSetSessionProperties());                sessionProperties.keySet().removeAll(query.getResetSessionProperties());                session = withProperties(session, sessionProperties);            }            // update transaction ID if necessary            if (query.isClearTransactionId()) {                session = stripTransactionId(session);            }            if (query.getStartedTransactionId() != null) {                session = withTransactionId(session, query.getStartedTransactionId());            }            queryRunner.setSession(session);        }        catch (RuntimeException e) {            System.err.println("Error running command: " + e.getMessage());            if (queryRunner.getSession().isDebug()) {                e.printStackTrace();            }        }    }

进入Query query = queryRunner.startQuery(sql)方法发送rest请求到server端进行运行:

    public Query startQuery(String query)    {        return new Query(startInternalQuery(query));    }
   public StatementClient startInternalQuery(String query)    {        return new StatementClient(httpClient, queryResultsCodec, session.get(), query);    }
    public StatementClient(HttpClient httpClient, JsonCodec<QueryResults> queryResultsCodec, ClientSession session, String query)    {        requireNonNull(httpClient, "httpClient is null");        requireNonNull(queryResultsCodec, "queryResultsCodec is null");        requireNonNull(session, "session is null");        requireNonNull(query, "query is null");        this.httpClient = httpClient;        this.responseHandler = createFullJsonResponseHandler(queryResultsCodec);        this.debug = session.isDebug();        this.timeZoneId = session.getTimeZoneId();        this.query = query;        this.requestTimeoutNanos = session.getClientRequestTimeout().roundTo(NANOSECONDS);        Request request = buildQueryRequest(session, query);        JsonResponse<QueryResults> response = httpClient.execute(request, responseHandler);        if (response.getStatusCode() != HttpStatus.OK.code() || !response.hasValue()) {            throw requestFailedException("starting query", request, response);        }        processResponse(response);    }

进入buildQueryRequest方法可以发现目标rest地址:

 private static Request buildQueryRequest(ClientSession session, String query)    {        Request.Builder builder = preparePost()                .setUri(uriBuilderFrom(session.getServer()).replacePath("/v1/statement").build())                .setBodyGenerator(createStaticBodyGenerator(query, UTF_8));        if (session.getUser() != null) {            builder.setHeader(PrestoHeaders.PRESTO_USER, session.getUser());        }        if (session.getSource() != null) {            builder.setHeader(PrestoHeaders.PRESTO_SOURCE, session.getSource());        }        if (session.getCatalog() != null) {            builder.setHeader(PrestoHeaders.PRESTO_CATALOG, session.getCatalog());        }        if (session.getSchema() != null) {            builder.setHeader(PrestoHeaders.PRESTO_SCHEMA, session.getSchema());        }        builder.setHeader(PrestoHeaders.PRESTO_TIME_ZONE, session.getTimeZoneId());        builder.setHeader(PrestoHeaders.PRESTO_LANGUAGE, session.getLocale().toLanguageTag());        builder.setHeader(USER_AGENT, USER_AGENT_VALUE);        Map<String, String> property = session.getProperties();        for (Entry<String, String> entry : property.entrySet()) {            builder.addHeader(PrestoHeaders.PRESTO_SESSION, entry.getKey() + "=" + entry.getValue());        }        builder.setHeader(PrestoHeaders.PRESTO_TRANSACTION_ID, session.getTransactionId() == null ? "NONE" : session.getTransactionId());        return builder.build();    }

v1/statement

发现StatementResource类为目标服务类:

@Path("/v1/statement")public class StatementResource{    private static final Logger log = Logger.get(StatementResource.class);    private static final Duration MAX_WAIT_TIME = new Duration(1, SECONDS);    private static final Ordering<Comparable<Duration>> WAIT_ORDERING = Ordering.natural().nullsLast();    private static final long DESIRED_RESULT_BYTES = new DataSize(1, MEGABYTE).toBytes();    private final QueryManager queryManager;    private final AccessControl accessControl;    private final SessionPropertyManager sessionPropertyManager;    private final ExchangeClientSupplier exchangeClientSupplier;    private final QueryIdGenerator queryIdGenerator;    private final ConcurrentMap<QueryId, Query> queries = new ConcurrentHashMap<>();    private final ScheduledExecutorService queryPurger = newSingleThreadScheduledExecutor(threadsNamed("query-purger"));....    @POST    @Produces(MediaType.APPLICATION_JSON)    public Response createQuery(            String statement,            @Context HttpServletRequest servletRequest,            @Context UriInfo uriInfo)            throws InterruptedException    {        assertRequest(!isNullOrEmpty(statement), "SQL statement is empty");        Session session = createSessionForRequest(servletRequest, accessControl, sessionPropertyManager, queryIdGenerator.createNextQueryId());        ExchangeClient exchangeClient = exchangeClientSupplier.get(deltaMemoryInBytes -> { });        Query query = new Query(session, statement, queryManager, exchangeClient);        queries.put(query.getQueryId(), query);        return getQueryResults(query, Optional.empty(), uriInfo, new Duration(1, MILLISECONDS));    }

在createQuery方法中将query存入一个对象。

在StatementResource构造函数运行时会进行调度:

 @Inject    public StatementResource(            QueryManager queryManager,            AccessControl accessControl,            SessionPropertyManager sessionPropertyManager,            ExchangeClientSupplier exchangeClientSupplier,            QueryIdGenerator queryIdGenerator)    {        this.queryManager = requireNonNull(queryManager, "queryManager is null");        this.accessControl = requireNonNull(accessControl, "accessControl is null");        this.sessionPropertyManager = requireNonNull(sessionPropertyManager, "sessionPropertyManager is null");        this.exchangeClientSupplier = requireNonNull(exchangeClientSupplier, "exchangeClientSupplier is null");        this.queryIdGenerator = requireNonNull(queryIdGenerator, "queryIdGenerator is null");        queryPurger.scheduleWithFixedDelay(new PurgeQueriesRunnable(queries, queryManager), 200, 200, MILLISECONDS);    }

PurgeQueriesRunnable线程的run方法中获取query进行运行, PurgeQueriesRunnable是私有内嵌类:

 private static class PurgeQueriesRunnable            implements Runnable    {        private final ConcurrentMap<QueryId, Query> queries;        private final QueryManager queryManager;        public PurgeQueriesRunnable(ConcurrentMap<QueryId, Query> queries, QueryManager queryManager)        {            this.queries = queries;            this.queryManager = queryManager;        }        @Override        public void run()        {            try {                // Queries are added to the query manager before being recorded in queryIds set.                // Therefore, we take a snapshot if queryIds before getting the live queries                // from the query manager.  Then we remove only the queries in the snapshot and                // not live queries set.  If we did this in the other order, a query could be                // registered between fetching the live queries and inspecting the queryIds set.                for (QueryId queryId : ImmutableSet.copyOf(queries.keySet())) {                    Query query = queries.get(queryId);                    Optional<QueryState> state = queryManager.getQueryState(queryId);                    // free up resources if the query completed                    if (!state.isPresent() || state.get() == QueryState.FAILED) {                        query.dispose();                    }                    // forget about this query if the query manager is no longer tracking it                    if (!state.isPresent()) {                        queries.remove(queryId);                    }                }            }            catch (Throwable e) {                log.warn(e, "Error removing old queries");            }        }    }

进入QueryResource类的createQuery方法:

    @POST    @Produces(MediaType.APPLICATION_JSON)    public Response createQuery(            String statement,            @Context HttpServletRequest servletRequest,            @Context UriInfo uriInfo)    {        assertRequest(!isNullOrEmpty(statement), "SQL statement is empty");        Session session = createSessionForRequest(servletRequest, accessControl, sessionPropertyManager, queryIdGenerator.createNextQueryId());        QueryInfo queryInfo = queryManager.createQuery(session, statement);        URI pagesUri = uriBuilderFrom(uriInfo.getRequestUri()).appendPath(queryInfo.getQueryId().toString()).build();        return Response.created(pagesUri).entity(queryInfo).build();    }

进入SqlQueryManager的createQuery方法:

@Override    public QueryInfo createQuery(Session session, String query)    {        requireNonNull(query, "query is null");        checkArgument(!query.isEmpty(), "query must not be empty string");        QueryId queryId = session.getQueryId();        QueryExecution queryExecution;        Statement statement;        try {            statement = sqlParser.createStatement(query);            QueryExecutionFactory<?> queryExecutionFactory = executionFactories.get(statement.getClass());            if (queryExecutionFactory == null) {                throw new PrestoException(NOT_SUPPORTED, "Unsupported statement type: " + statement.getClass().getSimpleName());            }            if (statement instanceof Explain && ((Explain) statement).isAnalyze()) {                Statement innerStatement = ((Explain) statement).getStatement();                if (!(executionFactories.get(innerStatement.getClass()) instanceof SqlQueryExecutionFactory)) {                    throw new PrestoException(NOT_SUPPORTED, "EXPLAIN ANALYZE only supported for statements that are queries");                }            }            queryExecution = queryExecutionFactory.createQueryExecution(queryId, query, session, statement);        }        catch (ParsingException | PrestoException e) {            // This is intentionally not a method, since after the state change listener is registered            // it's not safe to do any of this, and we had bugs before where people reused this code in a method            URI self = locationFactory.createQueryLocation(queryId);            QueryExecution execution = new FailedQueryExecution(queryId, query, session, self, transactionManager, queryExecutor, e);            queries.put(queryId, execution);            QueryInfo queryInfo = execution.getQueryInfo();            queryMonitor.createdEvent(queryInfo);            queryMonitor.completionEvent(queryInfo);            stats.queryFinished(queryInfo);            expirationQueue.add(execution);            return queryInfo;        }        QueryInfo queryInfo = queryExecution.getQueryInfo();        queryMonitor.createdEvent(queryInfo);        queryExecution.addStateChangeListener(newValue -> {            if (newValue.isDone()) {                QueryInfo info = queryExecution.getQueryInfo();                stats.queryFinished(info);                queryMonitor.completionEvent(info);                expirationQueue.add(queryExecution);            }        });        queries.put(queryId, queryExecution);        // start the query in the background        if (!queueManager.submit(statement, queryExecution, queryExecutor, stats)) {            queryExecution.fail(new PrestoException(QUERY_QUEUE_FULL, "Too many queued queries!"));        }        return queryInfo;    }

创建QueryExecution并
调用QueryQueueManager的submit方法。在QueuedExecution中调用start方法,通过QueryExecution自身的start方法运行:

 @Override    public void start()    {        try (SetThreadName ignored = new SetThreadName("Query-%s", stateMachine.getQueryId())) {            try {                // transition to planning                if (!stateMachine.transitionToPlanning()) {                    // query already started or finished                    return;                }                // analyze query                PlanRoot plan = analyzeQuery();                // plan distribution of query                planDistribution(plan);                // transition to starting                if (!stateMachine.transitionToStarting()) {                    // query already started or finished                    return;                }                // if query is not finished, start the scheduler, otherwise cancel it                SqlQueryScheduler scheduler = queryScheduler.get();                if (!stateMachine.isDone()) {                    scheduler.start();                }            }            catch (Throwable e) {                fail(e);                Throwables.propagateIfInstanceOf(e, Error.class);            }        }    }

PlanRoot plan = analyzeQuery(); 分析查询得到plan

planDistribution(plan);创建调度器和步骤,准备分发计划
在他里面创建调度器

  // build the stage execution objects (this doesn't schedule execution)        SqlQueryScheduler scheduler = new SqlQueryScheduler(                stateMachine,                locationFactory,                outputStageExecutionPlan,                nodePartitioningManager,                nodeScheduler,                remoteTaskFactory,                stateMachine.getSession(),                plan.isSummarizeTaskInfos(),                scheduleSplitBatchSize,                queryExecutor,                ROOT_OUTPUT_BUFFERS,                nodeTaskMap,                executionPolicy);

调度器构造函数里创建stages:

 List<SqlStageExecution> stages = createStages(                Optional.empty(),                new AtomicInteger(),                locationFactory,                plan.withBucketToPartition(Optional.of(new int[1])),                nodeScheduler,                remoteTaskFactory,                session,                splitBatchSize,                partitioningHandle -> partitioningCache.computeIfAbsent(partitioningHandle, handle -> nodePartitioningManager.getNodePartitioningMap(session, handle)),                executor,                nodeTaskMap,                stageSchedulers,                stageLinkages);

scheduler.start(); 分发plan成task到worker

worker上的task服务器接受到rest请求如下:
TaskResource类:

 @POST    @Path("{taskId}")    @Consumes(MediaType.APPLICATION_JSON)    @Produces(MediaType.APPLICATION_JSON)    public Response createOrUpdateTask(@PathParam("taskId") TaskId taskId, TaskUpdateRequest taskUpdateRequest, @Context UriInfo uriInfo)    {        requireNonNull(taskUpdateRequest, "taskUpdateRequest is null");        Session session = taskUpdateRequest.getSession().toSession(sessionPropertyManager);        TaskInfo taskInfo = taskManager.updateTask(session,                taskId,                taskUpdateRequest.getFragment(),                taskUpdateRequest.getSources(),                taskUpdateRequest.getOutputIds());        if (shouldSummarize(uriInfo)) {            taskInfo = taskInfo.summarize();        }        return Response.ok().entity(taskInfo).build();    }
    @Override    public TaskInfo updateTask(Session session, TaskId taskId, Optional<PlanFragment> fragment, List<TaskSource> sources, OutputBuffers outputBuffers)    {        requireNonNull(session, "session is null");        requireNonNull(taskId, "taskId is null");        requireNonNull(fragment, "fragment is null");        requireNonNull(sources, "sources is null");        requireNonNull(outputBuffers, "outputBuffers is null");        if (resourceOvercommit(session)) {            // TODO: This should have been done when the QueryContext was created. However, the session isn't available at that point.            queryContexts.getUnchecked(taskId.getQueryId()).setResourceOvercommit();        }        SqlTask sqlTask = tasks.getUnchecked(taskId);        sqlTask.recordHeartbeat();        return sqlTask.updateTask(session, fragment, sources, outputBuffers);    }
 public TaskInfo updateTask(Session session, Optional<PlanFragment> fragment, List<TaskSource> sources, OutputBuffers outputBuffers)    {        try {            // assure the task execution is only created once            SqlTaskExecution taskExecution;            synchronized (this) {                // is task already complete?                TaskHolder taskHolder = taskHolderReference.get();                if (taskHolder.isFinished()) {                    return taskHolder.getFinalTaskInfo();                }                taskExecution = taskHolder.getTaskExecution();                if (taskExecution == null) {                    checkState(fragment.isPresent(), "fragment must be present");                    taskExecution = sqlTaskExecutionFactory.create(session, queryContext, taskStateMachine, sharedBuffer, fragment.get(), sources);                    taskHolderReference.compareAndSet(taskHolder, new TaskHolder(taskExecution));                    needsPlan.set(false);                }            }            if (taskExecution != null) {                // addSources checks for task completion, so update the buffers first and the task might complete earlier                sharedBuffer.setOutputBuffers(outputBuffers);                taskExecution.addSources(sources);            }        }        catch (Error e) {            failed(e);            throw e;        }        catch (RuntimeException e) {            failed(e);        }        return getTaskInfo();    }

构造SqlTaskExecution:

 public TaskInfo updateTask(Session session, Optional<PlanFragment> fragment, List<TaskSource> sources, OutputBuffers outputBuffers)    {        try {            // assure the task execution is only created once            SqlTaskExecution taskExecution;            synchronized (this) {                // is task already complete?                TaskHolder taskHolder = taskHolderReference.get();                if (taskHolder.isFinished()) {                    return taskHolder.getFinalTaskInfo();                }                taskExecution = taskHolder.getTaskExecution();                if (taskExecution == null) {                    checkState(fragment.isPresent(), "fragment must be present");                    taskExecution = sqlTaskExecutionFactory.create(session, queryContext, taskStateMachine, sharedBuffer, fragment.get(), sources);                    taskHolderReference.compareAndSet(taskHolder, new TaskHolder(taskExecution));                    needsPlan.set(false);                }            }            if (taskExecution != null) {                // addSources checks for task completion, so update the buffers first and the task might complete earlier                sharedBuffer.setOutputBuffers(outputBuffers);                taskExecution.addSources(sources);            }        }        catch (Error e) {            failed(e);            throw e;        }        catch (RuntimeException e) {            failed(e);        }        return getTaskInfo();    }
private SqlTaskExecution(            TaskStateMachine taskStateMachine,            TaskContext taskContext,            SharedBuffer sharedBuffer,            PlanFragment fragment,            LocalExecutionPlanner planner,            TaskExecutor taskExecutor,            QueryMonitor queryMonitor,            Executor notificationExecutor)    {        this.taskStateMachine = requireNonNull(taskStateMachine, "taskStateMachine is null");        this.taskId = taskStateMachine.getTaskId();        this.taskContext = requireNonNull(taskContext, "taskContext is null");        this.sharedBuffer = requireNonNull(sharedBuffer, "sharedBuffer is null");        this.taskExecutor = requireNonNull(taskExecutor, "driverExecutor is null");        this.notificationExecutor = requireNonNull(notificationExecutor, "notificationExecutor is null");        this.queryMonitor = requireNonNull(queryMonitor, "queryMonitor is null");        try (SetThreadName ignored = new SetThreadName("Task-%s", taskId)) {            List<DriverFactory> driverFactories;            try {                LocalExecutionPlan localExecutionPlan = planner.plan(                        taskContext.getSession(),                        fragment.getRoot(),                        fragment.getSymbols(),                        fragment.getPartitionFunction(),                        sharedBuffer,                        fragment.getPartitioning().isSingleNode(),                        fragment.getPartitionedSource() == null);                driverFactories = localExecutionPlan.getDriverFactories();            }            catch (Throwable e) {                // planning failed                taskStateMachine.failed(e);                throw Throwables.propagate(e);            }            // index driver factories            DriverSplitRunnerFactory partitionedDriverFactory = null;            ImmutableList.Builder<DriverSplitRunnerFactory> unpartitionedDriverFactories = ImmutableList.builder();            for (DriverFactory driverFactory : driverFactories) {                if (driverFactory.getSourceIds().contains(fragment.getPartitionedSource())) {                    checkState(partitionedDriverFactory == null, "multiple partitioned sources are not supported");                    partitionedDriverFactory = new DriverSplitRunnerFactory(driverFactory);                }                else {                    unpartitionedDriverFactories.add(new DriverSplitRunnerFactory(driverFactory));                }            }            this.unpartitionedDriverFactories = unpartitionedDriverFactories.build();            if (fragment.getPartitionedSource() != null) {                checkArgument(partitionedDriverFactory != null, "Fragment is partitioned, but no partitioned driver found");            }            this.partitionedSourceId = fragment.getPartitionedSource();            this.partitionedDriverFactory = partitionedDriverFactory;            // don't register the task if it is already completed (most likely failed during planning above)            if (!taskStateMachine.getState().isDone()) {                taskHandle = taskExecutor.addTask(taskId, sharedBuffer::getUtilization, getInitialSplitsPerNode(taskContext.getSession()), getSplitConcurrencyAdjustmentInterval(taskContext.getSession()));                taskStateMachine.addStateChangeListener(new RemoveTaskHandleWhenDone(taskExecutor, taskHandle));                taskStateMachine.addStateChangeListener(state -> {                    if (state.isDone()) {                        for (DriverFactory factory : driverFactories) {                            factory.close();                        }                    }                });            }            else {                taskHandle = null;            }            sharedBuffer.addStateChangeListener(new CheckTaskCompletionOnBufferFinish(SqlTaskExecution.this));        }    }

在该构造函数中构建本地plan,构建多个driver执行各自的split

综上,整体的流程如下:
这里写图片描述

获取结果过程:

1 0
原创粉丝点击