To avoid problems with DPC timeouts move the majority of the receiver's
work, and interaction with the network stack, into a threaded DPC. This
leaves the poll entry point (called from the now non-threaded poller DPC)
to simply service responses and build a local packet queue that is then
drained by the threaded DPC.
Signed-off-by: Paul Durrant <***@citrix.com>
---
src/xenvif/receiver.c | 358 +++++++++++++++++++++++++++++---------------------
src/xenvif/vif.c | 7 +
2 files changed, 214 insertions(+), 151 deletions(-)
diff --git a/src/xenvif/receiver.c b/src/xenvif/receiver.c
index 3ee5a06..f5d572b 100644
--- a/src/xenvif/receiver.c
+++ b/src/xenvif/receiver.c
@@ -98,7 +98,10 @@ typedef struct _XENVIF_RECEIVER_RING {
ULONG BackfillSize;
PXENBUS_DEBUG_CALLBACK DebugCallback;
PXENVIF_THREAD WatchdogThread;
- LIST_ENTRY PacketList;
+ PLIST_ENTRY PacketQueue;
+ KDPC Dpc;
+ ULONG Dpcs;
+ LIST_ENTRY PacketComplete;
XENVIF_RECEIVER_HASH Hash;
} XENVIF_RECEIVER_RING, *PXENVIF_RECEIVER_RING;
@@ -921,10 +924,22 @@ fail1:
}
static VOID
+ReceiverRingCompletePacket(
+ IN PXENVIF_RECEIVER_RING Ring,
+ IN PXENVIF_RECEIVER_PACKET Packet
+ )
+{
+ ReceiverRingProcessTag(Ring, Packet);
+ ReceiverRingProcessChecksum(Ring, Packet);
+
+ ASSERT(IsZeroMemory(&Packet->ListEntry, sizeof (LIST_ENTRY)));
+ InsertTailList(&Ring->PacketComplete, &Packet->ListEntry);
+}
+
+static VOID
ReceiverRingProcessLargePacket(
IN PXENVIF_RECEIVER_RING Ring,
- IN PXENVIF_RECEIVER_PACKET Packet,
- OUT PLIST_ENTRY List
+ IN PXENVIF_RECEIVER_PACKET Packet
)
{
PXENVIF_RECEIVER Receiver;
@@ -1014,8 +1029,7 @@ ReceiverRingProcessLargePacket(
ASSERT3U(Length, >=, SegmentSize);
Length -= SegmentSize;
- ASSERT(IsZeroMemory(&Segment->ListEntry, sizeof (LIST_ENTRY)));
- InsertTailList(List, &Segment->ListEntry);
+ ReceiverRingCompletePacket(Ring, Segment);
if (Offload) {
ASSERT(Ring->OffloadOptions.NeedLargePacketSplit != 0);
@@ -1064,8 +1078,7 @@ ReceiverRingProcessLargePacket(
if (Receiver->AlwaysPullup != 0)
__ReceiverRingPullupPacket(Ring, Packet);
- ASSERT(IsZeroMemory(&Packet->ListEntry, sizeof (LIST_ENTRY)));
- InsertTailList(List, &Packet->ListEntry);
+ ReceiverRingCompletePacket(Ring, Packet);
} else {
__ReceiverRingPutPacket(Ring, Packet, TRUE);
}
@@ -1102,8 +1115,7 @@ fail1:
static VOID
ReceiverRingProcessStandardPacket(
IN PXENVIF_RECEIVER_RING Ring,
- IN PXENVIF_RECEIVER_PACKET Packet,
- OUT PLIST_ENTRY List
+ IN PXENVIF_RECEIVER_PACKET Packet
)
{
PXENVIF_RECEIVER Receiver;
@@ -1175,9 +1187,7 @@ ReceiverRingProcessStandardPacket(
Packet->Mdl.Next = Mdl;
}
- ASSERT(IsZeroMemory(&Packet->ListEntry, sizeof (LIST_ENTRY)));
- InsertTailList(List, &Packet->ListEntry);
-
+ ReceiverRingCompletePacket(Ring, Packet);
return;
fail2:
@@ -1210,8 +1220,7 @@ fail1:
static VOID
ReceiverRingProcessPacket(
IN PXENVIF_RECEIVER_RING Ring,
- IN PXENVIF_RECEIVER_PACKET Packet,
- OUT PLIST_ENTRY List
+ IN PXENVIF_RECEIVER_PACKET Packet
)
{
PXENVIF_RECEIVER Receiver;
@@ -1299,9 +1308,9 @@ ReceiverRingProcessPacket(
goto fail3;
if (Packet->MaximumSegmentSize != 0)
- ReceiverRingProcessLargePacket(Ring, Packet, List);
+ ReceiverRingProcessLargePacket(Ring, Packet);
else
- ReceiverRingProcessStandardPacket(Ring, Packet, List);
+ ReceiverRingProcessStandardPacket(Ring, Packet);
return;
@@ -1334,63 +1343,8 @@ fail1:
1);
}
-static VOID
-ReceiverRingProcessPackets(
- IN PXENVIF_RECEIVER_RING Ring,
- OUT PLIST_ENTRY List,
- OUT PULONG Count
- )
-{
- PLIST_ENTRY ListEntry;
-
- while (!IsListEmpty(&Ring->PacketList)) {
- PXENVIF_RECEIVER_PACKET Packet;
-
- ListEntry = RemoveHeadList(&Ring->PacketList);
- ASSERT3P(ListEntry, !=, &Ring->PacketList);
-
- RtlZeroMemory(ListEntry, sizeof (LIST_ENTRY));
-
- Packet = CONTAINING_RECORD(ListEntry, XENVIF_RECEIVER_PACKET, ListEntry);
- ReceiverRingProcessPacket(Ring, Packet, List);
- }
-
- for (ListEntry = List->Flink;
- ListEntry != List;
- ListEntry = ListEntry->Flink) {
- PXENVIF_RECEIVER_PACKET Packet;
-
- Packet = CONTAINING_RECORD(ListEntry, XENVIF_RECEIVER_PACKET, ListEntry);
-
- ReceiverRingProcessTag(Ring, Packet);
- ReceiverRingProcessChecksum(Ring, Packet);
-
- (*Count)++;
- }
-}
-
-static FORCEINLINE VOID
-__drv_requiresIRQL(DISPATCH_LEVEL)
-__ReceiverRingAcquireLock(
- IN PXENVIF_RECEIVER_RING Ring
- )
-{
- ASSERT3U(KeGetCurrentIrql(), ==, DISPATCH_LEVEL);
-
- KeAcquireSpinLockAtDpcLevel(&Ring->Lock);
-}
-
-static DECLSPEC_NOINLINE VOID
-ReceiverRingAcquireLock(
- IN PXENVIF_RECEIVER_RING Ring
- )
-{
- __ReceiverRingAcquireLock(Ring);
-}
-
static FORCEINLINE VOID
-__drv_requiresIRQL(DISPATCH_LEVEL)
-__ReceiverRingReleaseLock(
+__ReceiverRingSwizzle(
IN PXENVIF_RECEIVER_RING Ring
)
{
@@ -1398,33 +1352,44 @@ __ReceiverRingReleaseLock(
PXENVIF_FRONTEND Frontend;
PXENVIF_VIF_CONTEXT Context;
LIST_ENTRY List;
- ULONG Count;
- BOOLEAN More;
-
- ASSERT3U(KeGetCurrentIrql(), ==, DISPATCH_LEVEL);
+ PLIST_ENTRY ListEntry;
Receiver = Ring->Receiver;
Frontend = Receiver->Frontend;
Context = PdoGetVifContext(FrontendGetPdo(Frontend));
InitializeListHead(&List);
- Count = 0;
- ReceiverRingProcessPackets(Ring, &List, &Count);
- ASSERT(EQUIV(IsListEmpty(&List), Count == 0));
- ASSERT(IsListEmpty(&Ring->PacketList));
+ ListEntry = InterlockedExchangePointer(&Ring->PacketQueue, NULL);
- // We need to bump Loaned before dropping the lock to avoid VifDisable()
- // returning prematurely.
- __InterlockedAdd(&Receiver->Loaned, Count);
+ // Packets are held in the queue in reverse order so that the most
+ // recent is always head of the list. This is necessary to allow
+ // addition to the list to be done atomically.
-#pragma prefast(disable:26110)
- KeReleaseSpinLockFromDpcLevel(&Ring->Lock);
+ while (ListEntry != NULL) {
+ PLIST_ENTRY NextEntry;
+
+ NextEntry = ListEntry->Blink;
+ ListEntry->Flink = ListEntry->Blink = ListEntry;
+
+ InsertHeadList(&List, ListEntry);
+
+ ListEntry = NextEntry;
+ }
+
+ while (!IsListEmpty(&List)) {
+ PXENVIF_RECEIVER_PACKET Packet;
- More = !IsListEmpty(&List) ? TRUE : FALSE;
+ ListEntry = RemoveHeadList(&List);
+ ASSERT3P(ListEntry, !=, &List);
- while (More) {
- PLIST_ENTRY ListEntry;
+ RtlZeroMemory(ListEntry, sizeof (LIST_ENTRY));
+
+ Packet = CONTAINING_RECORD(ListEntry, XENVIF_RECEIVER_PACKET, ListEntry);
+ ReceiverRingProcessPacket(Ring, Packet);
+ }
+
+ while (!IsListEmpty(&Ring->PacketComplete)) {
PXENVIF_RECEIVER_PACKET Packet;
PXENVIF_PACKET_INFO Info;
PUCHAR BaseVa;
@@ -1432,14 +1397,11 @@ __ReceiverRingReleaseLock(
PETHERNET_ADDRESS DestinationAddress;
ETHERNET_ADDRESS_TYPE Type;
- ListEntry = RemoveHeadList(&List);
- ASSERT3P(ListEntry, !=, &List);
+ ListEntry = RemoveHeadList(&Ring->PacketComplete);
+ ASSERT3P(ListEntry, !=, &Ring->PacketComplete);
RtlZeroMemory(ListEntry, sizeof (LIST_ENTRY));
- ASSERT(More);
- More = !IsListEmpty(&List) ? TRUE : FALSE;
-
Packet = CONTAINING_RECORD(ListEntry,
XENVIF_RECEIVER_PACKET,
ListEntry);
@@ -1530,55 +1492,57 @@ __ReceiverRingReleaseLock(
XENVIF_RECEIVER_UDP_PACKETS,
1);
- if (Packet->MaximumSegmentSize != 0)
+ if (Packet->MaximumSegmentSize != 0)
FrontendIncrementStatistic(Frontend,
XENVIF_RECEIVER_GSO_PACKETS,
1);
- if (Packet->Flags.IpChecksumSucceeded != 0)
- FrontendIncrementStatistic(Frontend,
- XENVIF_RECEIVER_IPV4_CHECKSUM_SUCCEEDED,
- 1);
-
- if (Packet->Flags.IpChecksumFailed != 0)
- FrontendIncrementStatistic(Frontend,
- XENVIF_RECEIVER_IPV4_CHECKSUM_FAILED,
- 1);
-
- if (Packet->Flags.IpChecksumNotValidated != 0)
- FrontendIncrementStatistic(Frontend,
- XENVIF_RECEIVER_IPV4_CHECKSUM_NOT_VALIDATED,
- 1);
-
- if (Packet->Flags.TcpChecksumSucceeded != 0)
- FrontendIncrementStatistic(Frontend,
- XENVIF_RECEIVER_TCP_CHECKSUM_SUCCEEDED,
- 1);
-
- if (Packet->Flags.TcpChecksumFailed != 0)
- FrontendIncrementStatistic(Frontend,
- XENVIF_RECEIVER_TCP_CHECKSUM_FAILED,
- 1);
-
- if (Packet->Flags.TcpChecksumNotValidated != 0)
- FrontendIncrementStatistic(Frontend,
- XENVIF_RECEIVER_TCP_CHECKSUM_NOT_VALIDATED,
- 1);
-
- if (Packet->Flags.UdpChecksumSucceeded != 0)
- FrontendIncrementStatistic(Frontend,
- XENVIF_RECEIVER_UDP_CHECKSUM_SUCCEEDED,
- 1);
-
- if (Packet->Flags.UdpChecksumFailed != 0)
- FrontendIncrementStatistic(Frontend,
- XENVIF_RECEIVER_UDP_CHECKSUM_FAILED,
- 1);
-
- if (Packet->Flags.UdpChecksumNotValidated != 0)
- FrontendIncrementStatistic(Frontend,
- XENVIF_RECEIVER_UDP_CHECKSUM_NOT_VALIDATED,
- 1);
+ if (Packet->Flags.IpChecksumSucceeded != 0)
+ FrontendIncrementStatistic(Frontend,
+ XENVIF_RECEIVER_IPV4_CHECKSUM_SUCCEEDED,
+ 1);
+
+ if (Packet->Flags.IpChecksumFailed != 0)
+ FrontendIncrementStatistic(Frontend,
+ XENVIF_RECEIVER_IPV4_CHECKSUM_FAILED,
+ 1);
+
+ if (Packet->Flags.IpChecksumNotValidated != 0)
+ FrontendIncrementStatistic(Frontend,
+ XENVIF_RECEIVER_IPV4_CHECKSUM_NOT_VALIDATED,
+ 1);
+
+ if (Packet->Flags.TcpChecksumSucceeded != 0)
+ FrontendIncrementStatistic(Frontend,
+ XENVIF_RECEIVER_TCP_CHECKSUM_SUCCEEDED,
+ 1);
+
+ if (Packet->Flags.TcpChecksumFailed != 0)
+ FrontendIncrementStatistic(Frontend,
+ XENVIF_RECEIVER_TCP_CHECKSUM_FAILED,
+ 1);
+
+ if (Packet->Flags.TcpChecksumNotValidated != 0)
+ FrontendIncrementStatistic(Frontend,
+ XENVIF_RECEIVER_TCP_CHECKSUM_NOT_VALIDATED,
+ 1);
+
+ if (Packet->Flags.UdpChecksumSucceeded != 0)
+ FrontendIncrementStatistic(Frontend,
+ XENVIF_RECEIVER_UDP_CHECKSUM_SUCCEEDED,
+ 1);
+
+ if (Packet->Flags.UdpChecksumFailed != 0)
+ FrontendIncrementStatistic(Frontend,
+ XENVIF_RECEIVER_UDP_CHECKSUM_FAILED,
+ 1);
+
+ if (Packet->Flags.UdpChecksumNotValidated != 0)
+ FrontendIncrementStatistic(Frontend,
+ XENVIF_RECEIVER_UDP_CHECKSUM_NOT_VALIDATED,
+ 1);
+
+ (VOID) InterlockedIncrement(&Receiver->Loaned);
VifReceiverQueuePacket(Context,
Ring->Index,
@@ -1590,13 +1554,40 @@ __ReceiverRingReleaseLock(
Packet->TagControlInformation,
&Packet->Info,
&Packet->Hash,
- More,
+ !IsListEmpty(&Ring->PacketComplete) ? TRUE : FALSE,
Packet);
-
- --Count;
}
+}
- ASSERT3U(Count, ==, 0);
+static FORCEINLINE VOID
+__drv_requiresIRQL(DISPATCH_LEVEL)
+__ReceiverRingAcquireLock(
+ IN PXENVIF_RECEIVER_RING Ring
+ )
+{
+ ASSERT3U(KeGetCurrentIrql(), ==, DISPATCH_LEVEL);
+
+ KeAcquireSpinLockAtDpcLevel(&Ring->Lock);
+}
+
+static DECLSPEC_NOINLINE VOID
+ReceiverRingAcquireLock(
+ IN PXENVIF_RECEIVER_RING Ring
+ )
+{
+ __ReceiverRingAcquireLock(Ring);
+}
+
+static FORCEINLINE VOID
+__drv_requiresIRQL(DISPATCH_LEVEL)
+__ReceiverRingReleaseLock(
+ IN PXENVIF_RECEIVER_RING Ring
+ )
+{
+ ASSERT3U(KeGetCurrentIrql(), ==, DISPATCH_LEVEL);
+
+#pragma prefast(disable:26110)
+ KeReleaseSpinLockFromDpcLevel(&Ring->Lock);
}
static DECLSPEC_NOINLINE VOID
@@ -1607,6 +1598,29 @@ ReceiverRingReleaseLock(
__ReceiverRingReleaseLock(Ring);
}
+__drv_functionClass(KDEFERRED_ROUTINE)
+__drv_maxIRQL(DISPATCH_LEVEL)
+__drv_minIRQL(PASSIVE_LEVEL)
+__drv_sameIRQL
+static VOID
+ReceiverRingDpc(
+ IN PKDPC Dpc,
+ IN PVOID Context,
+ IN PVOID Argument1,
+ IN PVOID Argument2
+ )
+{
+ PXENVIF_RECEIVER_RING Ring = Context;
+
+ UNREFERENCED_PARAMETER(Dpc);
+ UNREFERENCED_PARAMETER(Argument1);
+ UNREFERENCED_PARAMETER(Argument2);
+
+ ASSERT(Ring != NULL);
+
+ __ReceiverRingSwizzle(Ring);
+}
+
static FORCEINLINE VOID
__ReceiverRingStop(
IN PXENVIF_RECEIVER_RING Ring
@@ -1892,6 +1906,11 @@ ReceiverRingDebugCallback(
(Ring->Enabled) ? "ENABLED" : "DISABLED",
(__ReceiverRingIsStopped(Ring)) ? "STOPPED" : "RUNNING");
+ XENBUS_DEBUG(Printf,
+ &Receiver->DebugInterface,
+ "Dpcs = %lu\n",
+ Ring->Dpcs);
+
// Dump front ring
XENBUS_DEBUG(Printf,
&Receiver->DebugInterface,
@@ -1918,6 +1937,26 @@ ReceiverRingDebugCallback(
Ring->ResponsesProcessed);
}
+static FORCEINLINE VOID
+__ReceiverRingQueuePacket(
+ IN PXENVIF_RECEIVER_RING Ring,
+ IN PXENVIF_RECEIVER_PACKET Packet
+ )
+{
+ PLIST_ENTRY ListEntry;
+ PLIST_ENTRY Old;
+ PLIST_ENTRY New;
+
+ ListEntry = &Packet->ListEntry;
+
+ do {
+ Old = Ring->PacketQueue;
+
+ ListEntry->Blink = Ring->PacketQueue;
+ New = ListEntry;
+ } while (InterlockedCompareExchangePointer(&Ring->PacketQueue, (PVOID)New, (PVOID)Old) != Old);
+}
+
static DECLSPEC_NOINLINE BOOLEAN
ReceiverRingPoll(
IN PXENVIF_RECEIVER_RING Ring
@@ -2133,7 +2172,7 @@ ReceiverRingPoll(
Packet->Flags.Value = flags;
ASSERT(IsZeroMemory(&Packet->ListEntry, sizeof (LIST_ENTRY)));
- InsertTailList(&Ring->PacketList, &Packet->ListEntry);
+ __ReceiverRingQueuePacket(Ring, Packet);
}
if (rsp_cons - Ring->Front.rsp_cons > XENVIF_RECEIVER_BATCH(Ring))
@@ -2166,6 +2205,10 @@ ReceiverRingPoll(
if (!__ReceiverRingIsStopped(Ring))
ReceiverRingFill(Ring);
+ if (Ring->PacketQueue != NULL &&
+ KeInsertQueueDpc(&Ring->Dpc, NULL, NULL))
+ Ring->Dpcs++;
+
done:
return Retry;
@@ -2301,7 +2344,7 @@ __ReceiverRingInitialize(
if ((*Ring)->Path == NULL)
goto fail2;
- InitializeListHead(&(*Ring)->PacketList);
+ InitializeListHead(&(*Ring)->PacketComplete);
status = RtlStringCbPrintfA(Name,
sizeof (Name),
@@ -2359,6 +2402,8 @@ __ReceiverRingInitialize(
if (!NT_SUCCESS(status))
goto fail7;
+ KeInitializeThreadedDpc(&(*Ring)->Dpc, ReceiverRingDpc, *Ring);
+
return STATUS_SUCCESS;
fail7:
@@ -2386,7 +2431,7 @@ fail4:
fail3:
Error("fail3\n");
- RtlZeroMemory(&(*Ring)->PacketList, sizeof (LIST_ENTRY));
+ RtlZeroMemory(&(*Ring)->PacketComplete, sizeof (LIST_ENTRY));
FrontendFreePath(Frontend, (*Ring)->Path);
(*Ring)->Path = NULL;
@@ -2419,6 +2464,7 @@ __ReceiverRingConnect(
PFN_NUMBER Pfn;
CHAR Name[MAXNAMELEN];
ULONG Index;
+ PROCESSOR_NUMBER ProcNumber;
NTSTATUS status;
Receiver = Ring->Receiver;
@@ -2495,6 +2541,11 @@ __ReceiverRingConnect(
if (!NT_SUCCESS(status))
goto fail6;
+ status = KeGetProcessorNumberFromIndex(Ring->Index, &ProcNumber);
+ ASSERT(NT_SUCCESS(status));
+
+ KeSetTargetProcessorDpcEx(&Ring->Dpc, &ProcNumber);
+
return STATUS_SUCCESS;
fail6:
@@ -2643,6 +2694,9 @@ __ReceiverRingDisable(
Ring->Enabled = FALSE;
Ring->Stopped = FALSE;
+ if (KeInsertQueueDpc(&Ring->Dpc, NULL, NULL))
+ Ring->Dpcs++;
+
__ReceiverRingReleaseLock(Ring);
Info("%s[%u]: <====\n",
@@ -2661,6 +2715,8 @@ __ReceiverRingDisconnect(
Receiver = Ring->Receiver;
Frontend = Receiver->Frontend;
+ Ring->Dpcs = 0;
+
__ReceiverRingEmpty(Ring);
ASSERT(Ring->Connected);
@@ -2714,6 +2770,9 @@ __ReceiverRingTeardown(
Ring->BackfillSize = 0;
Ring->OffloadOptions.Value = 0;
+ KeFlushQueuedDpcs();
+ RtlZeroMemory(&Ring->Dpc, sizeof (KDPC));
+
ThreadAlert(Ring->WatchdogThread);
ThreadJoin(Ring->WatchdogThread);
Ring->WatchdogThread = NULL;
@@ -2728,8 +2787,8 @@ __ReceiverRingTeardown(
Ring->PacketCache);
Ring->PacketCache = NULL;
- ASSERT(IsListEmpty(&Ring->PacketList));
- RtlZeroMemory(&Ring->PacketList, sizeof (LIST_ENTRY));
+ ASSERT(IsListEmpty(&Ring->PacketComplete));
+ RtlZeroMemory(&Ring->PacketComplete, sizeof (LIST_ENTRY));
FrontendFreePath(Frontend, Ring->Path);
Ring->Path = NULL;
@@ -3510,16 +3569,13 @@ ReceiverWaitForPackets(
LARGE_INTEGER Timeout;
ASSERT3U(KeGetCurrentIrql(), <, DISPATCH_LEVEL);
+ KeFlushQueuedDpcs();
Frontend = Receiver->Frontend;
Trace("%s: ====>\n", FrontendGetPath(Frontend));
Returned = Receiver->Returned;
-
- // Make sure Loaned is not sampled before Returned
- KeMemoryBarrier();
-
Loaned = Receiver->Loaned;
ASSERT3S(Loaned - Returned, >=, 0);
diff --git a/src/xenvif/vif.c b/src/xenvif/vif.c
index ffdec50..69ced78 100644
--- a/src/xenvif/vif.c
+++ b/src/xenvif/vif.c
@@ -1161,6 +1161,7 @@ __VifReceiverQueuePacket(
Hash,
More,
Cookie);
+
}
VOID
@@ -1179,6 +1180,10 @@ VifReceiverQueuePacket(
IN PVOID Cookie
)
{
+ KIRQL Irql;
+
+ KeRaiseIrql(DISPATCH_LEVEL, &Irql);
+
switch (Context->Version) {
case 6:
__VifReceiverQueuePacketVersion6(Context,
@@ -1229,6 +1234,8 @@ VifReceiverQueuePacket(
ASSERT(FALSE);
break;
}
+
+ KeLowerIrql(Irql);
}
VOID
--
2.5.3