diff options
Diffstat (limited to 'vendor/google.golang.org/grpc/internal/transport/http2_server.go')
-rw-r--r-- | vendor/google.golang.org/grpc/internal/transport/http2_server.go | 1461 |
1 files changed, 1461 insertions, 0 deletions
diff --git a/vendor/google.golang.org/grpc/internal/transport/http2_server.go b/vendor/google.golang.org/grpc/internal/transport/http2_server.go new file mode 100644 index 000000000..bc3da7067 --- /dev/null +++ b/vendor/google.golang.org/grpc/internal/transport/http2_server.go @@ -0,0 +1,1461 @@ +/* + * + * Copyright 2014 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package transport + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "math" + "net" + "net/http" + "strconv" + "sync" + "sync/atomic" + "time" + + "github.com/golang/protobuf/proto" + "golang.org/x/net/http2" + "golang.org/x/net/http2/hpack" + "google.golang.org/grpc/internal/grpcutil" + "google.golang.org/grpc/internal/syscall" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/internal/channelz" + "google.golang.org/grpc/internal/grpcrand" + "google.golang.org/grpc/internal/grpcsync" + "google.golang.org/grpc/keepalive" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/peer" + "google.golang.org/grpc/stats" + "google.golang.org/grpc/status" + "google.golang.org/grpc/tap" +) + +var ( + // ErrIllegalHeaderWrite indicates that setting header is illegal because of + // the stream's state. + ErrIllegalHeaderWrite = status.Error(codes.Internal, "transport: SendHeader called multiple times") + // ErrHeaderListSizeLimitViolation indicates that the header list size is larger + // than the limit set by peer. + ErrHeaderListSizeLimitViolation = status.Error(codes.Internal, "transport: trying to send header list size larger than the limit set by peer") +) + +// serverConnectionCounter counts the number of connections a server has seen +// (equal to the number of http2Servers created). Must be accessed atomically. +var serverConnectionCounter uint64 + +// http2Server implements the ServerTransport interface with HTTP2. +type http2Server struct { + lastRead int64 // Keep this field 64-bit aligned. Accessed atomically. + ctx context.Context + done chan struct{} + conn net.Conn + loopy *loopyWriter + readerDone chan struct{} // sync point to enable testing. + writerDone chan struct{} // sync point to enable testing. + remoteAddr net.Addr + localAddr net.Addr + authInfo credentials.AuthInfo // auth info about the connection + inTapHandle tap.ServerInHandle + framer *framer + // The max number of concurrent streams. + maxStreams uint32 + // controlBuf delivers all the control related tasks (e.g., window + // updates, reset streams, and various settings) to the controller. + controlBuf *controlBuffer + fc *trInFlow + stats []stats.Handler + // Keepalive and max-age parameters for the server. + kp keepalive.ServerParameters + // Keepalive enforcement policy. + kep keepalive.EnforcementPolicy + // The time instance last ping was received. + lastPingAt time.Time + // Number of times the client has violated keepalive ping policy so far. + pingStrikes uint8 + // Flag to signify that number of ping strikes should be reset to 0. + // This is set whenever data or header frames are sent. + // 1 means yes. + resetPingStrikes uint32 // Accessed atomically. + initialWindowSize int32 + bdpEst *bdpEstimator + maxSendHeaderListSize *uint32 + + mu sync.Mutex // guard the following + + // drainEvent is initialized when Drain() is called the first time. After + // which the server writes out the first GoAway(with ID 2^31-1) frame. Then + // an independent goroutine will be launched to later send the second + // GoAway. During this time we don't want to write another first GoAway(with + // ID 2^31 -1) frame. Thus call to Drain() will be a no-op if drainEvent is + // already initialized since draining is already underway. + drainEvent *grpcsync.Event + state transportState + activeStreams map[uint32]*Stream + // idle is the time instant when the connection went idle. + // This is either the beginning of the connection or when the number of + // RPCs go down to 0. + // When the connection is busy, this value is set to 0. + idle time.Time + + // Fields below are for channelz metric collection. + channelzID *channelz.Identifier + czData *channelzData + bufferPool *bufferPool + + connectionID uint64 + + // maxStreamMu guards the maximum stream ID + // This lock may not be taken if mu is already held. + maxStreamMu sync.Mutex + maxStreamID uint32 // max stream ID ever seen +} + +// NewServerTransport creates a http2 transport with conn and configuration +// options from config. +// +// It returns a non-nil transport and a nil error on success. On failure, it +// returns a nil transport and a non-nil error. For a special case where the +// underlying conn gets closed before the client preface could be read, it +// returns a nil transport and a nil error. +func NewServerTransport(conn net.Conn, config *ServerConfig) (_ ServerTransport, err error) { + var authInfo credentials.AuthInfo + rawConn := conn + if config.Credentials != nil { + var err error + conn, authInfo, err = config.Credentials.ServerHandshake(rawConn) + if err != nil { + // ErrConnDispatched means that the connection was dispatched away + // from gRPC; those connections should be left open. io.EOF means + // the connection was closed before handshaking completed, which can + // happen naturally from probers. Return these errors directly. + if err == credentials.ErrConnDispatched || err == io.EOF { + return nil, err + } + return nil, connectionErrorf(false, err, "ServerHandshake(%q) failed: %v", rawConn.RemoteAddr(), err) + } + } + writeBufSize := config.WriteBufferSize + readBufSize := config.ReadBufferSize + maxHeaderListSize := defaultServerMaxHeaderListSize + if config.MaxHeaderListSize != nil { + maxHeaderListSize = *config.MaxHeaderListSize + } + framer := newFramer(conn, writeBufSize, readBufSize, maxHeaderListSize) + // Send initial settings as connection preface to client. + isettings := []http2.Setting{{ + ID: http2.SettingMaxFrameSize, + Val: http2MaxFrameLen, + }} + // TODO(zhaoq): Have a better way to signal "no limit" because 0 is + // permitted in the HTTP2 spec. + maxStreams := config.MaxStreams + if maxStreams == 0 { + maxStreams = math.MaxUint32 + } else { + isettings = append(isettings, http2.Setting{ + ID: http2.SettingMaxConcurrentStreams, + Val: maxStreams, + }) + } + dynamicWindow := true + iwz := int32(initialWindowSize) + if config.InitialWindowSize >= defaultWindowSize { + iwz = config.InitialWindowSize + dynamicWindow = false + } + icwz := int32(initialWindowSize) + if config.InitialConnWindowSize >= defaultWindowSize { + icwz = config.InitialConnWindowSize + dynamicWindow = false + } + if iwz != defaultWindowSize { + isettings = append(isettings, http2.Setting{ + ID: http2.SettingInitialWindowSize, + Val: uint32(iwz)}) + } + if config.MaxHeaderListSize != nil { + isettings = append(isettings, http2.Setting{ + ID: http2.SettingMaxHeaderListSize, + Val: *config.MaxHeaderListSize, + }) + } + if config.HeaderTableSize != nil { + isettings = append(isettings, http2.Setting{ + ID: http2.SettingHeaderTableSize, + Val: *config.HeaderTableSize, + }) + } + if err := framer.fr.WriteSettings(isettings...); err != nil { + return nil, connectionErrorf(false, err, "transport: %v", err) + } + // Adjust the connection flow control window if needed. + if delta := uint32(icwz - defaultWindowSize); delta > 0 { + if err := framer.fr.WriteWindowUpdate(0, delta); err != nil { + return nil, connectionErrorf(false, err, "transport: %v", err) + } + } + kp := config.KeepaliveParams + if kp.MaxConnectionIdle == 0 { + kp.MaxConnectionIdle = defaultMaxConnectionIdle + } + if kp.MaxConnectionAge == 0 { + kp.MaxConnectionAge = defaultMaxConnectionAge + } + // Add a jitter to MaxConnectionAge. + kp.MaxConnectionAge += getJitter(kp.MaxConnectionAge) + if kp.MaxConnectionAgeGrace == 0 { + kp.MaxConnectionAgeGrace = defaultMaxConnectionAgeGrace + } + if kp.Time == 0 { + kp.Time = defaultServerKeepaliveTime + } + if kp.Timeout == 0 { + kp.Timeout = defaultServerKeepaliveTimeout + } + if kp.Time != infinity { + if err = syscall.SetTCPUserTimeout(conn, kp.Timeout); err != nil { + return nil, connectionErrorf(false, err, "transport: failed to set TCP_USER_TIMEOUT: %v", err) + } + } + kep := config.KeepalivePolicy + if kep.MinTime == 0 { + kep.MinTime = defaultKeepalivePolicyMinTime + } + + done := make(chan struct{}) + t := &http2Server{ + ctx: setConnection(context.Background(), rawConn), + done: done, + conn: conn, + remoteAddr: conn.RemoteAddr(), + localAddr: conn.LocalAddr(), + authInfo: authInfo, + framer: framer, + readerDone: make(chan struct{}), + writerDone: make(chan struct{}), + maxStreams: maxStreams, + inTapHandle: config.InTapHandle, + fc: &trInFlow{limit: uint32(icwz)}, + state: reachable, + activeStreams: make(map[uint32]*Stream), + stats: config.StatsHandlers, + kp: kp, + idle: time.Now(), + kep: kep, + initialWindowSize: iwz, + czData: new(channelzData), + bufferPool: newBufferPool(), + } + // Add peer information to the http2server context. + t.ctx = peer.NewContext(t.ctx, t.getPeer()) + + t.controlBuf = newControlBuffer(t.done) + if dynamicWindow { + t.bdpEst = &bdpEstimator{ + bdp: initialWindowSize, + updateFlowControl: t.updateFlowControl, + } + } + for _, sh := range t.stats { + t.ctx = sh.TagConn(t.ctx, &stats.ConnTagInfo{ + RemoteAddr: t.remoteAddr, + LocalAddr: t.localAddr, + }) + connBegin := &stats.ConnBegin{} + sh.HandleConn(t.ctx, connBegin) + } + t.channelzID, err = channelz.RegisterNormalSocket(t, config.ChannelzParentID, fmt.Sprintf("%s -> %s", t.remoteAddr, t.localAddr)) + if err != nil { + return nil, err + } + + t.connectionID = atomic.AddUint64(&serverConnectionCounter, 1) + t.framer.writer.Flush() + + defer func() { + if err != nil { + t.Close(err) + } + }() + + // Check the validity of client preface. + preface := make([]byte, len(clientPreface)) + if _, err := io.ReadFull(t.conn, preface); err != nil { + // In deployments where a gRPC server runs behind a cloud load balancer + // which performs regular TCP level health checks, the connection is + // closed immediately by the latter. Returning io.EOF here allows the + // grpc server implementation to recognize this scenario and suppress + // logging to reduce spam. + if err == io.EOF { + return nil, io.EOF + } + return nil, connectionErrorf(false, err, "transport: http2Server.HandleStreams failed to receive the preface from client: %v", err) + } + if !bytes.Equal(preface, clientPreface) { + return nil, connectionErrorf(false, nil, "transport: http2Server.HandleStreams received bogus greeting from client: %q", preface) + } + + frame, err := t.framer.fr.ReadFrame() + if err == io.EOF || err == io.ErrUnexpectedEOF { + return nil, err + } + if err != nil { + return nil, connectionErrorf(false, err, "transport: http2Server.HandleStreams failed to read initial settings frame: %v", err) + } + atomic.StoreInt64(&t.lastRead, time.Now().UnixNano()) + sf, ok := frame.(*http2.SettingsFrame) + if !ok { + return nil, connectionErrorf(false, nil, "transport: http2Server.HandleStreams saw invalid preface type %T from client", frame) + } + t.handleSettings(sf) + + go func() { + t.loopy = newLoopyWriter(serverSide, t.framer, t.controlBuf, t.bdpEst) + t.loopy.ssGoAwayHandler = t.outgoingGoAwayHandler + err := t.loopy.run() + if logger.V(logLevel) { + logger.Infof("transport: loopyWriter exited. Closing connection. Err: %v", err) + } + t.conn.Close() + t.controlBuf.finish() + close(t.writerDone) + }() + go t.keepalive() + return t, nil +} + +// operateHeaders takes action on the decoded headers. Returns an error if fatal +// error encountered and transport needs to close, otherwise returns nil. +func (t *http2Server) operateHeaders(frame *http2.MetaHeadersFrame, handle func(*Stream), traceCtx func(context.Context, string) context.Context) error { + // Acquire max stream ID lock for entire duration + t.maxStreamMu.Lock() + defer t.maxStreamMu.Unlock() + + streamID := frame.Header().StreamID + + // frame.Truncated is set to true when framer detects that the current header + // list size hits MaxHeaderListSize limit. + if frame.Truncated { + t.controlBuf.put(&cleanupStream{ + streamID: streamID, + rst: true, + rstCode: http2.ErrCodeFrameSize, + onWrite: func() {}, + }) + return nil + } + + if streamID%2 != 1 || streamID <= t.maxStreamID { + // illegal gRPC stream id. + return fmt.Errorf("received an illegal stream id: %v. headers frame: %+v", streamID, frame) + } + t.maxStreamID = streamID + + buf := newRecvBuffer() + s := &Stream{ + id: streamID, + st: t, + buf: buf, + fc: &inFlow{limit: uint32(t.initialWindowSize)}, + } + var ( + // if false, content-type was missing or invalid + isGRPC = false + contentType = "" + mdata = make(map[string][]string) + httpMethod string + // these are set if an error is encountered while parsing the headers + protocolError bool + headerError *status.Status + + timeoutSet bool + timeout time.Duration + ) + + for _, hf := range frame.Fields { + switch hf.Name { + case "content-type": + contentSubtype, validContentType := grpcutil.ContentSubtype(hf.Value) + if !validContentType { + contentType = hf.Value + break + } + mdata[hf.Name] = append(mdata[hf.Name], hf.Value) + s.contentSubtype = contentSubtype + isGRPC = true + case "grpc-encoding": + s.recvCompress = hf.Value + case ":method": + httpMethod = hf.Value + case ":path": + s.method = hf.Value + case "grpc-timeout": + timeoutSet = true + var err error + if timeout, err = decodeTimeout(hf.Value); err != nil { + headerError = status.Newf(codes.Internal, "malformed grpc-timeout: %v", err) + } + // "Transports must consider requests containing the Connection header + // as malformed." - A41 + case "connection": + if logger.V(logLevel) { + logger.Errorf("transport: http2Server.operateHeaders parsed a :connection header which makes a request malformed as per the HTTP/2 spec") + } + protocolError = true + default: + if isReservedHeader(hf.Name) && !isWhitelistedHeader(hf.Name) { + break + } + v, err := decodeMetadataHeader(hf.Name, hf.Value) + if err != nil { + headerError = status.Newf(codes.Internal, "malformed binary metadata %q in header %q: %v", hf.Value, hf.Name, err) + logger.Warningf("Failed to decode metadata header (%q, %q): %v", hf.Name, hf.Value, err) + break + } + mdata[hf.Name] = append(mdata[hf.Name], v) + } + } + + // "If multiple Host headers or multiple :authority headers are present, the + // request must be rejected with an HTTP status code 400 as required by Host + // validation in RFC 7230 ยง5.4, gRPC status code INTERNAL, or RST_STREAM + // with HTTP/2 error code PROTOCOL_ERROR." - A41. Since this is a HTTP/2 + // error, this takes precedence over a client not speaking gRPC. + if len(mdata[":authority"]) > 1 || len(mdata["host"]) > 1 { + errMsg := fmt.Sprintf("num values of :authority: %v, num values of host: %v, both must only have 1 value as per HTTP/2 spec", len(mdata[":authority"]), len(mdata["host"])) + if logger.V(logLevel) { + logger.Errorf("transport: %v", errMsg) + } + t.controlBuf.put(&earlyAbortStream{ + httpStatus: http.StatusBadRequest, + streamID: streamID, + contentSubtype: s.contentSubtype, + status: status.New(codes.Internal, errMsg), + rst: !frame.StreamEnded(), + }) + return nil + } + + if protocolError { + t.controlBuf.put(&cleanupStream{ + streamID: streamID, + rst: true, + rstCode: http2.ErrCodeProtocol, + onWrite: func() {}, + }) + return nil + } + if !isGRPC { + t.controlBuf.put(&earlyAbortStream{ + httpStatus: http.StatusUnsupportedMediaType, + streamID: streamID, + contentSubtype: s.contentSubtype, + status: status.Newf(codes.InvalidArgument, "invalid gRPC request content-type %q", contentType), + rst: !frame.StreamEnded(), + }) + return nil + } + if headerError != nil { + t.controlBuf.put(&earlyAbortStream{ + httpStatus: http.StatusBadRequest, + streamID: streamID, + contentSubtype: s.contentSubtype, + status: headerError, + rst: !frame.StreamEnded(), + }) + return nil + } + + // "If :authority is missing, Host must be renamed to :authority." - A41 + if len(mdata[":authority"]) == 0 { + // No-op if host isn't present, no eventual :authority header is a valid + // RPC. + if host, ok := mdata["host"]; ok { + mdata[":authority"] = host + delete(mdata, "host") + } + } else { + // "If :authority is present, Host must be discarded" - A41 + delete(mdata, "host") + } + + if frame.StreamEnded() { + // s is just created by the caller. No lock needed. + s.state = streamReadDone + } + if timeoutSet { + s.ctx, s.cancel = context.WithTimeout(t.ctx, timeout) + } else { + s.ctx, s.cancel = context.WithCancel(t.ctx) + } + + // Attach the received metadata to the context. + if len(mdata) > 0 { + s.ctx = metadata.NewIncomingContext(s.ctx, mdata) + if statsTags := mdata["grpc-tags-bin"]; len(statsTags) > 0 { + s.ctx = stats.SetIncomingTags(s.ctx, []byte(statsTags[len(statsTags)-1])) + } + if statsTrace := mdata["grpc-trace-bin"]; len(statsTrace) > 0 { + s.ctx = stats.SetIncomingTrace(s.ctx, []byte(statsTrace[len(statsTrace)-1])) + } + } + t.mu.Lock() + if t.state != reachable { + t.mu.Unlock() + s.cancel() + return nil + } + if uint32(len(t.activeStreams)) >= t.maxStreams { + t.mu.Unlock() + t.controlBuf.put(&cleanupStream{ + streamID: streamID, + rst: true, + rstCode: http2.ErrCodeRefusedStream, + onWrite: func() {}, + }) + s.cancel() + return nil + } + if httpMethod != http.MethodPost { + t.mu.Unlock() + errMsg := fmt.Sprintf("http2Server.operateHeaders parsed a :method field: %v which should be POST", httpMethod) + if logger.V(logLevel) { + logger.Infof("transport: %v", errMsg) + } + t.controlBuf.put(&earlyAbortStream{ + httpStatus: 405, + streamID: streamID, + contentSubtype: s.contentSubtype, + status: status.New(codes.Internal, errMsg), + rst: !frame.StreamEnded(), + }) + s.cancel() + return nil + } + if t.inTapHandle != nil { + var err error + if s.ctx, err = t.inTapHandle(s.ctx, &tap.Info{FullMethodName: s.method}); err != nil { + t.mu.Unlock() + if logger.V(logLevel) { + logger.Infof("transport: http2Server.operateHeaders got an error from InTapHandle: %v", err) + } + stat, ok := status.FromError(err) + if !ok { + stat = status.New(codes.PermissionDenied, err.Error()) + } + t.controlBuf.put(&earlyAbortStream{ + httpStatus: 200, + streamID: s.id, + contentSubtype: s.contentSubtype, + status: stat, + rst: !frame.StreamEnded(), + }) + return nil + } + } + t.activeStreams[streamID] = s + if len(t.activeStreams) == 1 { + t.idle = time.Time{} + } + t.mu.Unlock() + if channelz.IsOn() { + atomic.AddInt64(&t.czData.streamsStarted, 1) + atomic.StoreInt64(&t.czData.lastStreamCreatedTime, time.Now().UnixNano()) + } + s.requestRead = func(n int) { + t.adjustWindow(s, uint32(n)) + } + s.ctx = traceCtx(s.ctx, s.method) + for _, sh := range t.stats { + s.ctx = sh.TagRPC(s.ctx, &stats.RPCTagInfo{FullMethodName: s.method}) + inHeader := &stats.InHeader{ + FullMethod: s.method, + RemoteAddr: t.remoteAddr, + LocalAddr: t.localAddr, + Compression: s.recvCompress, + WireLength: int(frame.Header().Length), + Header: metadata.MD(mdata).Copy(), + } + sh.HandleRPC(s.ctx, inHeader) + } + s.ctxDone = s.ctx.Done() + s.wq = newWriteQuota(defaultWriteQuota, s.ctxDone) + s.trReader = &transportReader{ + reader: &recvBufferReader{ + ctx: s.ctx, + ctxDone: s.ctxDone, + recv: s.buf, + freeBuffer: t.bufferPool.put, + }, + windowHandler: func(n int) { + t.updateWindow(s, uint32(n)) + }, + } + // Register the stream with loopy. + t.controlBuf.put(®isterStream{ + streamID: s.id, + wq: s.wq, + }) + handle(s) + return nil +} + +// HandleStreams receives incoming streams using the given handler. This is +// typically run in a separate goroutine. +// traceCtx attaches trace to ctx and returns the new context. +func (t *http2Server) HandleStreams(handle func(*Stream), traceCtx func(context.Context, string) context.Context) { + defer close(t.readerDone) + for { + t.controlBuf.throttle() + frame, err := t.framer.fr.ReadFrame() + atomic.StoreInt64(&t.lastRead, time.Now().UnixNano()) + if err != nil { + if se, ok := err.(http2.StreamError); ok { + if logger.V(logLevel) { + logger.Warningf("transport: http2Server.HandleStreams encountered http2.StreamError: %v", se) + } + t.mu.Lock() + s := t.activeStreams[se.StreamID] + t.mu.Unlock() + if s != nil { + t.closeStream(s, true, se.Code, false) + } else { + t.controlBuf.put(&cleanupStream{ + streamID: se.StreamID, + rst: true, + rstCode: se.Code, + onWrite: func() {}, + }) + } + continue + } + if err == io.EOF || err == io.ErrUnexpectedEOF { + t.Close(err) + return + } + t.Close(err) + return + } + switch frame := frame.(type) { + case *http2.MetaHeadersFrame: + if err := t.operateHeaders(frame, handle, traceCtx); err != nil { + t.Close(err) + break + } + case *http2.DataFrame: + t.handleData(frame) + case *http2.RSTStreamFrame: + t.handleRSTStream(frame) + case *http2.SettingsFrame: + t.handleSettings(frame) + case *http2.PingFrame: + t.handlePing(frame) + case *http2.WindowUpdateFrame: + t.handleWindowUpdate(frame) + case *http2.GoAwayFrame: + // TODO: Handle GoAway from the client appropriately. + default: + if logger.V(logLevel) { + logger.Errorf("transport: http2Server.HandleStreams found unhandled frame type %v.", frame) + } + } + } +} + +func (t *http2Server) getStream(f http2.Frame) (*Stream, bool) { + t.mu.Lock() + defer t.mu.Unlock() + if t.activeStreams == nil { + // The transport is closing. + return nil, false + } + s, ok := t.activeStreams[f.Header().StreamID] + if !ok { + // The stream is already done. + return nil, false + } + return s, true +} + +// adjustWindow sends out extra window update over the initial window size +// of stream if the application is requesting data larger in size than +// the window. +func (t *http2Server) adjustWindow(s *Stream, n uint32) { + if w := s.fc.maybeAdjust(n); w > 0 { + t.controlBuf.put(&outgoingWindowUpdate{streamID: s.id, increment: w}) + } + +} + +// updateWindow adjusts the inbound quota for the stream and the transport. +// Window updates will deliver to the controller for sending when +// the cumulative quota exceeds the corresponding threshold. +func (t *http2Server) updateWindow(s *Stream, n uint32) { + if w := s.fc.onRead(n); w > 0 { + t.controlBuf.put(&outgoingWindowUpdate{streamID: s.id, + increment: w, + }) + } +} + +// updateFlowControl updates the incoming flow control windows +// for the transport and the stream based on the current bdp +// estimation. +func (t *http2Server) updateFlowControl(n uint32) { + t.mu.Lock() + for _, s := range t.activeStreams { + s.fc.newLimit(n) + } + t.initialWindowSize = int32(n) + t.mu.Unlock() + t.controlBuf.put(&outgoingWindowUpdate{ + streamID: 0, + increment: t.fc.newLimit(n), + }) + t.controlBuf.put(&outgoingSettings{ + ss: []http2.Setting{ + { + ID: http2.SettingInitialWindowSize, + Val: n, + }, + }, + }) + +} + +func (t *http2Server) handleData(f *http2.DataFrame) { + size := f.Header().Length + var sendBDPPing bool + if t.bdpEst != nil { + sendBDPPing = t.bdpEst.add(size) + } + // Decouple connection's flow control from application's read. + // An update on connection's flow control should not depend on + // whether user application has read the data or not. Such a + // restriction is already imposed on the stream's flow control, + // and therefore the sender will be blocked anyways. + // Decoupling the connection flow control will prevent other + // active(fast) streams from starving in presence of slow or + // inactive streams. + if w := t.fc.onData(size); w > 0 { + t.controlBuf.put(&outgoingWindowUpdate{ + streamID: 0, + increment: w, + }) + } + if sendBDPPing { + // Avoid excessive ping detection (e.g. in an L7 proxy) + // by sending a window update prior to the BDP ping. + if w := t.fc.reset(); w > 0 { + t.controlBuf.put(&outgoingWindowUpdate{ + streamID: 0, + increment: w, + }) + } + t.controlBuf.put(bdpPing) + } + // Select the right stream to dispatch. + s, ok := t.getStream(f) + if !ok { + return + } + if s.getState() == streamReadDone { + t.closeStream(s, true, http2.ErrCodeStreamClosed, false) + return + } + if size > 0 { + if err := s.fc.onData(size); err != nil { + t.closeStream(s, true, http2.ErrCodeFlowControl, false) + return + } + if f.Header().Flags.Has(http2.FlagDataPadded) { + if w := s.fc.onRead(size - uint32(len(f.Data()))); w > 0 { + t.controlBuf.put(&outgoingWindowUpdate{s.id, w}) + } + } + // TODO(bradfitz, zhaoq): A copy is required here because there is no + // guarantee f.Data() is consumed before the arrival of next frame. + // Can this copy be eliminated? + if len(f.Data()) > 0 { + buffer := t.bufferPool.get() + buffer.Reset() + buffer.Write(f.Data()) + s.write(recvMsg{buffer: buffer}) + } + } + if f.StreamEnded() { + // Received the end of stream from the client. + s.compareAndSwapState(streamActive, streamReadDone) + s.write(recvMsg{err: io.EOF}) + } +} + +func (t *http2Server) handleRSTStream(f *http2.RSTStreamFrame) { + // If the stream is not deleted from the transport's active streams map, then do a regular close stream. + if s, ok := t.getStream(f); ok { + t.closeStream(s, false, 0, false) + return + } + // If the stream is already deleted from the active streams map, then put a cleanupStream item into controlbuf to delete the stream from loopy writer's established streams map. + t.controlBuf.put(&cleanupStream{ + streamID: f.Header().StreamID, + rst: false, + rstCode: 0, + onWrite: func() {}, + }) +} + +func (t *http2Server) handleSettings(f *http2.SettingsFrame) { + if f.IsAck() { + return + } + var ss []http2.Setting + var updateFuncs []func() + f.ForeachSetting(func(s http2.Setting) error { + switch s.ID { + case http2.SettingMaxHeaderListSize: + updateFuncs = append(updateFuncs, func() { + t.maxSendHeaderListSize = new(uint32) + *t.maxSendHeaderListSize = s.Val + }) + default: + ss = append(ss, s) + } + return nil + }) + t.controlBuf.executeAndPut(func(interface{}) bool { + for _, f := range updateFuncs { + f() + } + return true + }, &incomingSettings{ + ss: ss, + }) +} + +const ( + maxPingStrikes = 2 + defaultPingTimeout = 2 * time.Hour +) + +func (t *http2Server) handlePing(f *http2.PingFrame) { + if f.IsAck() { + if f.Data == goAwayPing.data && t.drainEvent != nil { + t.drainEvent.Fire() + return + } + // Maybe it's a BDP ping. + if t.bdpEst != nil { + t.bdpEst.calculate(f.Data) + } + return + } + pingAck := &ping{ack: true} + copy(pingAck.data[:], f.Data[:]) + t.controlBuf.put(pingAck) + + now := time.Now() + defer func() { + t.lastPingAt = now + }() + // A reset ping strikes means that we don't need to check for policy + // violation for this ping and the pingStrikes counter should be set + // to 0. + if atomic.CompareAndSwapUint32(&t.resetPingStrikes, 1, 0) { + t.pingStrikes = 0 + return + } + t.mu.Lock() + ns := len(t.activeStreams) + t.mu.Unlock() + if ns < 1 && !t.kep.PermitWithoutStream { + // Keepalive shouldn't be active thus, this new ping should + // have come after at least defaultPingTimeout. + if t.lastPingAt.Add(defaultPingTimeout).After(now) { + t.pingStrikes++ + } + } else { + // Check if keepalive policy is respected. + if t.lastPingAt.Add(t.kep.MinTime).After(now) { + t.pingStrikes++ + } + } + + if t.pingStrikes > maxPingStrikes { + // Send goaway and close the connection. + t.controlBuf.put(&goAway{code: http2.ErrCodeEnhanceYourCalm, debugData: []byte("too_many_pings"), closeConn: errors.New("got too many pings from the client")}) + } +} + +func (t *http2Server) handleWindowUpdate(f *http2.WindowUpdateFrame) { + t.controlBuf.put(&incomingWindowUpdate{ + streamID: f.Header().StreamID, + increment: f.Increment, + }) +} + +func appendHeaderFieldsFromMD(headerFields []hpack.HeaderField, md metadata.MD) []hpack.HeaderField { + for k, vv := range md { + if isReservedHeader(k) { + // Clients don't tolerate reading restricted headers after some non restricted ones were sent. + continue + } + for _, v := range vv { + headerFields = append(headerFields, hpack.HeaderField{Name: k, Value: encodeMetadataHeader(k, v)}) + } + } + return headerFields +} + +func (t *http2Server) checkForHeaderListSize(it interface{}) bool { + if t.maxSendHeaderListSize == nil { + return true + } + hdrFrame := it.(*headerFrame) + var sz int64 + for _, f := range hdrFrame.hf { + if sz += int64(f.Size()); sz > int64(*t.maxSendHeaderListSize) { + if logger.V(logLevel) { + logger.Errorf("header list size to send violates the maximum size (%d bytes) set by client", *t.maxSendHeaderListSize) + } + return false + } + } + return true +} + +func (t *http2Server) streamContextErr(s *Stream) error { + select { + case <-t.done: + return ErrConnClosing + default: + } + return ContextErr(s.ctx.Err()) +} + +// WriteHeader sends the header metadata md back to the client. +func (t *http2Server) WriteHeader(s *Stream, md metadata.MD) error { + s.hdrMu.Lock() + defer s.hdrMu.Unlock() + if s.getState() == streamDone { + return t.streamContextErr(s) + } + + if s.updateHeaderSent() { + return ErrIllegalHeaderWrite + } + + if md.Len() > 0 { + if s.header.Len() > 0 { + s.header = metadata.Join(s.header, md) + } else { + s.header = md + } + } + if err := t.writeHeaderLocked(s); err != nil { + return status.Convert(err).Err() + } + return nil +} + +func (t *http2Server) setResetPingStrikes() { + atomic.StoreUint32(&t.resetPingStrikes, 1) +} + +func (t *http2Server) writeHeaderLocked(s *Stream) error { + // TODO(mmukhi): Benchmark if the performance gets better if count the metadata and other header fields + // first and create a slice of that exact size. + headerFields := make([]hpack.HeaderField, 0, 2) // at least :status, content-type will be there if none else. + headerFields = append(headerFields, hpack.HeaderField{Name: ":status", Value: "200"}) + headerFields = append(headerFields, hpack.HeaderField{Name: "content-type", Value: grpcutil.ContentType(s.contentSubtype)}) + if s.sendCompress != "" { + headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-encoding", Value: s.sendCompress}) + } + headerFields = appendHeaderFieldsFromMD(headerFields, s.header) + success, err := t.controlBuf.executeAndPut(t.checkForHeaderListSize, &headerFrame{ + streamID: s.id, + hf: headerFields, + endStream: false, + onWrite: t.setResetPingStrikes, + }) + if !success { + if err != nil { + return err + } + t.closeStream(s, true, http2.ErrCodeInternal, false) + return ErrHeaderListSizeLimitViolation + } + for _, sh := range t.stats { + // Note: Headers are compressed with hpack after this call returns. + // No WireLength field is set here. + outHeader := &stats.OutHeader{ + Header: s.header.Copy(), + Compression: s.sendCompress, + } + sh.HandleRPC(s.Context(), outHeader) + } + return nil +} + +// WriteStatus sends stream status to the client and terminates the stream. +// There is no further I/O operations being able to perform on this stream. +// TODO(zhaoq): Now it indicates the end of entire stream. Revisit if early +// OK is adopted. +func (t *http2Server) WriteStatus(s *Stream, st *status.Status) error { + s.hdrMu.Lock() + defer s.hdrMu.Unlock() + + if s.getState() == streamDone { + return nil + } + + // TODO(mmukhi): Benchmark if the performance gets better if count the metadata and other header fields + // first and create a slice of that exact size. + headerFields := make([]hpack.HeaderField, 0, 2) // grpc-status and grpc-message will be there if none else. + if !s.updateHeaderSent() { // No headers have been sent. + if len(s.header) > 0 { // Send a separate header frame. + if err := t.writeHeaderLocked(s); err != nil { + return err + } + } else { // Send a trailer only response. + headerFields = append(headerFields, hpack.HeaderField{Name: ":status", Value: "200"}) + headerFields = append(headerFields, hpack.HeaderField{Name: "content-type", Value: grpcutil.ContentType(s.contentSubtype)}) + } + } + headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-status", Value: strconv.Itoa(int(st.Code()))}) + headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-message", Value: encodeGrpcMessage(st.Message())}) + + if p := st.Proto(); p != nil && len(p.Details) > 0 { + stBytes, err := proto.Marshal(p) + if err != nil { + // TODO: return error instead, when callers are able to handle it. + logger.Errorf("transport: failed to marshal rpc status: %v, error: %v", p, err) + } else { + headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-status-details-bin", Value: encodeBinHeader(stBytes)}) + } + } + + // Attach the trailer metadata. + headerFields = appendHeaderFieldsFromMD(headerFields, s.trailer) + trailingHeader := &headerFrame{ + streamID: s.id, + hf: headerFields, + endStream: true, + onWrite: t.setResetPingStrikes, + } + + success, err := t.controlBuf.execute(t.checkForHeaderListSize, trailingHeader) + if !success { + if err != nil { + return err + } + t.closeStream(s, true, http2.ErrCodeInternal, false) + return ErrHeaderListSizeLimitViolation + } + // Send a RST_STREAM after the trailers if the client has not already half-closed. + rst := s.getState() == streamActive + t.finishStream(s, rst, http2.ErrCodeNo, trailingHeader, true) + for _, sh := range t.stats { + // Note: The trailer fields are compressed with hpack after this call returns. + // No WireLength field is set here. + sh.HandleRPC(s.Context(), &stats.OutTrailer{ + Trailer: s.trailer.Copy(), + }) + } + return nil +} + +// Write converts the data into HTTP2 data frame and sends it out. Non-nil error +// is returns if it fails (e.g., framing error, transport error). +func (t *http2Server) Write(s *Stream, hdr []byte, data []byte, opts *Options) error { + if !s.isHeaderSent() { // Headers haven't been written yet. + if err := t.WriteHeader(s, nil); err != nil { + return err + } + } else { + // Writing headers checks for this condition. + if s.getState() == streamDone { + return t.streamContextErr(s) + } + } + df := &dataFrame{ + streamID: s.id, + h: hdr, + d: data, + onEachWrite: t.setResetPingStrikes, + } + if err := s.wq.get(int32(len(hdr) + len(data))); err != nil { + return t.streamContextErr(s) + } + return t.controlBuf.put(df) +} + +// keepalive running in a separate goroutine does the following: +// 1. Gracefully closes an idle connection after a duration of keepalive.MaxConnectionIdle. +// 2. Gracefully closes any connection after a duration of keepalive.MaxConnectionAge. +// 3. Forcibly closes a connection after an additive period of keepalive.MaxConnectionAgeGrace over keepalive.MaxConnectionAge. +// 4. Makes sure a connection is alive by sending pings with a frequency of keepalive.Time and closes a non-responsive connection +// after an additional duration of keepalive.Timeout. +func (t *http2Server) keepalive() { + p := &ping{} + // True iff a ping has been sent, and no data has been received since then. + outstandingPing := false + // Amount of time remaining before which we should receive an ACK for the + // last sent ping. + kpTimeoutLeft := time.Duration(0) + // Records the last value of t.lastRead before we go block on the timer. + // This is required to check for read activity since then. + prevNano := time.Now().UnixNano() + // Initialize the different timers to their default values. + idleTimer := time.NewTimer(t.kp.MaxConnectionIdle) + ageTimer := time.NewTimer(t.kp.MaxConnectionAge) + kpTimer := time.NewTimer(t.kp.Time) + defer func() { + // We need to drain the underlying channel in these timers after a call + // to Stop(), only if we are interested in resetting them. Clearly we + // are not interested in resetting them here. + idleTimer.Stop() + ageTimer.Stop() + kpTimer.Stop() + }() + + for { + select { + case <-idleTimer.C: + t.mu.Lock() + idle := t.idle + if idle.IsZero() { // The connection is non-idle. + t.mu.Unlock() + idleTimer.Reset(t.kp.MaxConnectionIdle) + continue + } + val := t.kp.MaxConnectionIdle - time.Since(idle) + t.mu.Unlock() + if val <= 0 { + // The connection has been idle for a duration of keepalive.MaxConnectionIdle or more. + // Gracefully close the connection. + t.Drain() + return + } + idleTimer.Reset(val) + case <-ageTimer.C: + t.Drain() + ageTimer.Reset(t.kp.MaxConnectionAgeGrace) + select { + case <-ageTimer.C: + // Close the connection after grace period. + if logger.V(logLevel) { + logger.Infof("transport: closing server transport due to maximum connection age.") + } + t.controlBuf.put(closeConnection{}) + case <-t.done: + } + return + case <-kpTimer.C: + lastRead := atomic.LoadInt64(&t.lastRead) + if lastRead > prevNano { + // There has been read activity since the last time we were + // here. Setup the timer to fire at kp.Time seconds from + // lastRead time and continue. + outstandingPing = false + kpTimer.Reset(time.Duration(lastRead) + t.kp.Time - time.Duration(time.Now().UnixNano())) + prevNano = lastRead + continue + } + if outstandingPing && kpTimeoutLeft <= 0 { + t.Close(fmt.Errorf("keepalive ping not acked within timeout %s", t.kp.Time)) + return + } + if !outstandingPing { + if channelz.IsOn() { + atomic.AddInt64(&t.czData.kpCount, 1) + } + t.controlBuf.put(p) + kpTimeoutLeft = t.kp.Timeout + outstandingPing = true + } + // The amount of time to sleep here is the minimum of kp.Time and + // timeoutLeft. This will ensure that we wait only for kp.Time + // before sending out the next ping (for cases where the ping is + // acked). + sleepDuration := minTime(t.kp.Time, kpTimeoutLeft) + kpTimeoutLeft -= sleepDuration + kpTimer.Reset(sleepDuration) + case <-t.done: + return + } + } +} + +// Close starts shutting down the http2Server transport. +// TODO(zhaoq): Now the destruction is not blocked on any pending streams. This +// could cause some resource issue. Revisit this later. +func (t *http2Server) Close(err error) { + t.mu.Lock() + if t.state == closing { + t.mu.Unlock() + return + } + if logger.V(logLevel) { + logger.Infof("transport: closing: %v", err) + } + t.state = closing + streams := t.activeStreams + t.activeStreams = nil + t.mu.Unlock() + t.controlBuf.finish() + close(t.done) + if err := t.conn.Close(); err != nil && logger.V(logLevel) { + logger.Infof("transport: error closing conn during Close: %v", err) + } + channelz.RemoveEntry(t.channelzID) + // Cancel all active streams. + for _, s := range streams { + s.cancel() + } + for _, sh := range t.stats { + connEnd := &stats.ConnEnd{} + sh.HandleConn(t.ctx, connEnd) + } +} + +// deleteStream deletes the stream s from transport's active streams. +func (t *http2Server) deleteStream(s *Stream, eosReceived bool) { + + t.mu.Lock() + if _, ok := t.activeStreams[s.id]; ok { + delete(t.activeStreams, s.id) + if len(t.activeStreams) == 0 { + t.idle = time.Now() + } + } + t.mu.Unlock() + + if channelz.IsOn() { + if eosReceived { + atomic.AddInt64(&t.czData.streamsSucceeded, 1) + } else { + atomic.AddInt64(&t.czData.streamsFailed, 1) + } + } +} + +// finishStream closes the stream and puts the trailing headerFrame into controlbuf. +func (t *http2Server) finishStream(s *Stream, rst bool, rstCode http2.ErrCode, hdr *headerFrame, eosReceived bool) { + // In case stream sending and receiving are invoked in separate + // goroutines (e.g., bi-directional streaming), cancel needs to be + // called to interrupt the potential blocking on other goroutines. + s.cancel() + + oldState := s.swapState(streamDone) + if oldState == streamDone { + // If the stream was already done, return. + return + } + + hdr.cleanup = &cleanupStream{ + streamID: s.id, + rst: rst, + rstCode: rstCode, + onWrite: func() { + t.deleteStream(s, eosReceived) + }, + } + t.controlBuf.put(hdr) +} + +// closeStream clears the footprint of a stream when the stream is not needed any more. +func (t *http2Server) closeStream(s *Stream, rst bool, rstCode http2.ErrCode, eosReceived bool) { + // In case stream sending and receiving are invoked in separate + // goroutines (e.g., bi-directional streaming), cancel needs to be + // called to interrupt the potential blocking on other goroutines. + s.cancel() + + s.swapState(streamDone) + t.deleteStream(s, eosReceived) + + t.controlBuf.put(&cleanupStream{ + streamID: s.id, + rst: rst, + rstCode: rstCode, + onWrite: func() {}, + }) +} + +func (t *http2Server) RemoteAddr() net.Addr { + return t.remoteAddr +} + +func (t *http2Server) Drain() { + t.mu.Lock() + defer t.mu.Unlock() + if t.drainEvent != nil { + return + } + t.drainEvent = grpcsync.NewEvent() + t.controlBuf.put(&goAway{code: http2.ErrCodeNo, debugData: []byte{}, headsUp: true}) +} + +var goAwayPing = &ping{data: [8]byte{1, 6, 1, 8, 0, 3, 3, 9}} + +// Handles outgoing GoAway and returns true if loopy needs to put itself +// in draining mode. +func (t *http2Server) outgoingGoAwayHandler(g *goAway) (bool, error) { + t.maxStreamMu.Lock() + t.mu.Lock() + if t.state == closing { // TODO(mmukhi): This seems unnecessary. + t.mu.Unlock() + t.maxStreamMu.Unlock() + // The transport is closing. + return false, ErrConnClosing + } + if !g.headsUp { + // Stop accepting more streams now. + t.state = draining + sid := t.maxStreamID + retErr := g.closeConn + if len(t.activeStreams) == 0 { + retErr = errors.New("second GOAWAY written and no active streams left to process") + } + t.mu.Unlock() + t.maxStreamMu.Unlock() + if err := t.framer.fr.WriteGoAway(sid, g.code, g.debugData); err != nil { + return false, err + } + if retErr != nil { + // Abruptly close the connection following the GoAway (via + // loopywriter). But flush out what's inside the buffer first. + t.framer.writer.Flush() + return false, retErr + } + return true, nil + } + t.mu.Unlock() + t.maxStreamMu.Unlock() + // For a graceful close, send out a GoAway with stream ID of MaxUInt32, + // Follow that with a ping and wait for the ack to come back or a timer + // to expire. During this time accept new streams since they might have + // originated before the GoAway reaches the client. + // After getting the ack or timer expiration send out another GoAway this + // time with an ID of the max stream server intends to process. + if err := t.framer.fr.WriteGoAway(math.MaxUint32, http2.ErrCodeNo, []byte{}); err != nil { + return false, err + } + if err := t.framer.fr.WritePing(false, goAwayPing.data); err != nil { + return false, err + } + go func() { + timer := time.NewTimer(time.Minute) + defer timer.Stop() + select { + case <-t.drainEvent.Done(): + case <-timer.C: + case <-t.done: + return + } + t.controlBuf.put(&goAway{code: g.code, debugData: g.debugData}) + }() + return false, nil +} + +func (t *http2Server) ChannelzMetric() *channelz.SocketInternalMetric { + s := channelz.SocketInternalMetric{ + StreamsStarted: atomic.LoadInt64(&t.czData.streamsStarted), + StreamsSucceeded: atomic.LoadInt64(&t.czData.streamsSucceeded), + StreamsFailed: atomic.LoadInt64(&t.czData.streamsFailed), + MessagesSent: atomic.LoadInt64(&t.czData.msgSent), + MessagesReceived: atomic.LoadInt64(&t.czData.msgRecv), + KeepAlivesSent: atomic.LoadInt64(&t.czData.kpCount), + LastRemoteStreamCreatedTimestamp: time.Unix(0, atomic.LoadInt64(&t.czData.lastStreamCreatedTime)), + LastMessageSentTimestamp: time.Unix(0, atomic.LoadInt64(&t.czData.lastMsgSentTime)), + LastMessageReceivedTimestamp: time.Unix(0, atomic.LoadInt64(&t.czData.lastMsgRecvTime)), + LocalFlowControlWindow: int64(t.fc.getSize()), + SocketOptions: channelz.GetSocketOption(t.conn), + LocalAddr: t.localAddr, + RemoteAddr: t.remoteAddr, + // RemoteName : + } + if au, ok := t.authInfo.(credentials.ChannelzSecurityInfo); ok { + s.Security = au.GetSecurityValue() + } + s.RemoteFlowControlWindow = t.getOutFlowWindow() + return &s +} + +func (t *http2Server) IncrMsgSent() { + atomic.AddInt64(&t.czData.msgSent, 1) + atomic.StoreInt64(&t.czData.lastMsgSentTime, time.Now().UnixNano()) +} + +func (t *http2Server) IncrMsgRecv() { + atomic.AddInt64(&t.czData.msgRecv, 1) + atomic.StoreInt64(&t.czData.lastMsgRecvTime, time.Now().UnixNano()) +} + +func (t *http2Server) getOutFlowWindow() int64 { + resp := make(chan uint32, 1) + timer := time.NewTimer(time.Second) + defer timer.Stop() + t.controlBuf.put(&outFlowControlSizeRequest{resp}) + select { + case sz := <-resp: + return int64(sz) + case <-t.done: + return -1 + case <-timer.C: + return -2 + } +} + +func (t *http2Server) getPeer() *peer.Peer { + return &peer.Peer{ + Addr: t.remoteAddr, + AuthInfo: t.authInfo, // Can be nil + } +} + +func getJitter(v time.Duration) time.Duration { + if v == infinity { + return 0 + } + // Generate a jitter between +/- 10% of the value. + r := int64(v / 10) + j := grpcrand.Int63n(2*r) - r + return time.Duration(j) +} + +type connectionKey struct{} + +// GetConnection gets the connection from the context. +func GetConnection(ctx context.Context) net.Conn { + conn, _ := ctx.Value(connectionKey{}).(net.Conn) + return conn +} + +// SetConnection adds the connection to the context to be able to get +// information about the destination ip and port for an incoming RPC. This also +// allows any unary or streaming interceptors to see the connection. +func setConnection(ctx context.Context, conn net.Conn) context.Context { + return context.WithValue(ctx, connectionKey{}, conn) +} |