[systemd-devel] [PATCH 2/3] socket: Add support for TCP defer accept
Susant Sahani
susant at redhat.com
Thu Aug 14 10:36:12 PDT 2014
TCP_DEFER_ACCEPT Allow a listener to be awakened only when data
arrives on the socket. If TCP_DEFER_ACCEPT set on a server-side
listening socket, the TCP/IP stack will not to wait for the final
ACK packet and not to initiate the process until the first packet
of real data has arrived. After sending the SYN/ACK, the server will
then wait for a data packet from a client. Now, only three packets
will be sent over the network, and the connection establishment delay
will be significantly reduced.
---
man/systemd.socket.xml | 24 ++++++++++++++++++++++++
src/core/dbus-socket.c | 1 +
src/core/load-fragment-gperf.gperf.m4 | 1 +
src/core/socket.c | 12 ++++++++++++
src/core/socket.h | 1 +
5 files changed, 39 insertions(+)
diff --git a/man/systemd.socket.xml b/man/systemd.socket.xml
index 5efb398..38f16e5 100644
--- a/man/systemd.socket.xml
+++ b/man/systemd.socket.xml
@@ -546,6 +546,30 @@
</varlistentry>
<varlistentry>
+ <term><varname>DeferAccept=</varname></term>
+ <listitem><para>Takes time (in seconds) as argument. If set, the listening process
+ will be awakened only when data arrives on the socket, and not immediately
+ when connection is established. When this option is set, the
+ <constant>TCP_DEFER_ACCEPT</constant> socket option will be used
+ (see
+ <citerefentry><refentrytitle>tcp</refentrytitle><manvolnum>7</manvolnum></citerefentry>),
+ and the kernel will ignore initial ACK packets without any data.
+ The argument specifies the approximate amount
+ of time the kernel should wait for incoming data before falling
+ back to the normal behaviour of honouring empty ACK packets.
+ This option beneficial for protocols where the client sends the data
+ first (e.g. HTTP, in contrast to SMTP), because the server
+ process will not be woken up unnecessarily before it can take any action.
+ </para>
+ <para>If the client also uses the <constant>TCP_DEFER_ACCEPT</constant>
+ option, the latency of the initial connection may be
+ reduced, because the kernel will send data in the
+ final packet establishing the connection (the third packet in the
+ "three-way handshake").</para>
+ <para>Disabled by default.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>ReceiveBuffer=</varname></term>
<term><varname>SendBuffer=</varname></term>
<listitem><para>Takes an integer
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c
index bdf111c..cc55b8d 100644
--- a/src/core/dbus-socket.c
+++ b/src/core/dbus-socket.c
@@ -100,6 +100,7 @@ const sd_bus_vtable bus_socket_vtable[] = {
SD_BUS_PROPERTY("KeepAliveTime", "t", bus_property_get_usec, offsetof(Socket, keep_alive_time), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("KeepAliveInterval", "t", bus_property_get_usec, offsetof(Socket, keep_alive_interval), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("KeepAliveProbes", "u", bus_property_get_unsigned, offsetof(Socket, keep_alive_cnt), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DeferAccept" , "t", bus_property_get_usec, offsetof(Socket, defer_accept), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Priority", "i", bus_property_get_int, offsetof(Socket, priority), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ReceiveBuffer", "t", bus_property_get_size, offsetof(Socket, receive_buffer), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SendBuffer", "t", bus_property_get_size, offsetof(Socket, send_buffer), SD_BUS_VTABLE_PROPERTY_CONST),
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 67bd0e5..b4e2b25 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -234,6 +234,7 @@ Socket.KeepAlive, config_parse_bool, 0,
Socket.KeepAliveTime, config_parse_sec, 0, offsetof(Socket, keep_alive_time)
Socket.KeepAliveInterval, config_parse_sec, 0, offsetof(Socket, keep_alive_interval)
Socket.KeepAliveProbes, config_parse_unsigned, 0, offsetof(Socket, keep_alive_cnt)
+Socket.DeferAccept, config_parse_sec, 0, offsetof(Socket, defer_accept)
Socket.NoDelay, config_parse_bool, 0, offsetof(Socket, no_delay)
Socket.Priority, config_parse_int, 0, offsetof(Socket, priority)
Socket.ReceiveBuffer, config_parse_iec_size, 0, offsetof(Socket, receive_buffer)
diff --git a/src/core/socket.c b/src/core/socket.c
index d6d9821..a16b20d 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -613,6 +613,12 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
"%sKeepAliveProbes: %u\n",
prefix, s->keep_alive_cnt);
+ if(s->defer_accept)
+ fprintf(f,
+ "%sDeferAccept: %s\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX,
+ s->defer_accept, USEC_PER_SEC));
+
LIST_FOREACH(port, p, s->ports) {
if (p->type == SOCKET_SOCKET) {
@@ -828,6 +834,12 @@ static void socket_apply_socket_options(Socket *s, int fd) {
log_warning_unit(UNIT(s)->id, "TCP_KEEPCNT failed: %m");
}
+ if (s->defer_accept) {
+ int value = s->defer_accept / USEC_PER_SEC;
+ if (setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &value, sizeof(value)) < 0)
+ log_warning_unit(UNIT(s)->id, "TCP_DEFER_ACCEPT failed: %m");
+ }
+
if (s->no_delay) {
int b = s->no_delay;
if (setsockopt(fd, SOL_TCP, TCP_NODELAY, &b, sizeof(b)) < 0)
diff --git a/src/core/socket.h b/src/core/socket.h
index 8871eb1..eede705 100644
--- a/src/core/socket.h
+++ b/src/core/socket.h
@@ -105,6 +105,7 @@ struct Socket {
usec_t timeout_usec;
usec_t keep_alive_time;
usec_t keep_alive_interval;
+ usec_t defer_accept;
ExecCommand* exec_command[_SOCKET_EXEC_COMMAND_MAX];
ExecContext exec_context;
--
1.9.3
More information about the systemd-devel
mailing list