diff --git a/.wordlist.txt b/.wordlist.txt index 5b643c59d..aa49c6efd 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -108,6 +108,7 @@ configurated Conntrack Containerlab CP +CPUs CreatedHalfOpenFlow CreatedOtherFlow CreatedTcpFlow @@ -133,6 +134,8 @@ DACs DASHOrch dashorch DashOrch +dashsai +dashsubmodule dashsubmodule datacenter DataCenter @@ -158,6 +161,8 @@ decaps deliverables DemoDays deparsing +dequeue +dequeues designator DEST dest @@ -183,6 +188,8 @@ dockerized DoS DotNet downcasting +dpapp +DPAPP DPDK DPU dpu @@ -227,11 +234,14 @@ eni Eni ENIs ENI's +enqueue +enqueues entrypoints enum EPUs EPYC ethernet +EtherType executables ExpressRoute extern @@ -241,6 +251,7 @@ failovers fakesai FastPath fc +FDio FEC ffe FINACK @@ -335,6 +346,7 @@ Jinja jitter journaled json +Junhua keepalive keepalives Keysight @@ -483,7 +495,6 @@ PrivateAddress programmability programmatically proto -proto protobuf provids PRs @@ -501,7 +512,6 @@ Pyunit qcow qos QoS -qos qsfp Radv RangeOrValue @@ -512,7 +522,6 @@ READMEs README's recirculation reconvergence -reconvergence RECV RedirectRuleResimulatedUf redis @@ -537,6 +546,7 @@ routable RPC RPCs RPF +RSS RST RSTPackets RSTs @@ -735,11 +745,13 @@ vport VPort vPORT VPorts +vpp +VPP +vppctl VTEP VTEPs VxLAN VXLAN -VxLAN VxLan vxlan warmboots @@ -759,6 +771,7 @@ YAML yaml yml Ze +Zhai Zhixiong ZMQ -ZTVN +ZTVN \ No newline at end of file diff --git a/dash-pipeline/Makefile b/dash-pipeline/Makefile index 6b4c8cd40..40cf83f46 100644 --- a/dash-pipeline/Makefile +++ b/dash-pipeline/Makefile @@ -1,5 +1,12 @@ SHELL = /bin/bash +HAVE_DPAPP ?= +ifeq ($(HAVE_DPAPP),y) +DPAPP_LINK = veth4 +DPAPP_LINK_PEER = veth5 +SWITCH_PORT_DPAPP = --interface 2@$(DPAPP_LINK) +endif + mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST))) mkfile_dir := $(dir $(mkfile_path)) @@ -64,6 +71,10 @@ include dockerfiles/DOCKER_SAI_CHALLENGER_CLIENT_BLDR_IMG.env # SAIChallenger Client image built locally with saithrift client libs + PTF & Pytest frameworks DOCKER_SAI_CHALLENGER_CLIENT_IMG ?= local/dash-saichallenger-client:latest +# Run/compile dpapp +# include file defines DOCKER_DPAPP_IMG +include dockerfiles/DOCKER_DPAPP_IMG.env + # Set differently in CI scripts as needed, e.g. run switch container in -d mode DOCKER_FLAGS ?=-it @@ -251,6 +262,53 @@ run-saithrift-bldr-bash: $(DOCKER_RUN_SAITHRIFT_BLDR) \ /bin/bash +###################################### +# bmv2 date-plane app TARGETS +###################################### +DOCKER_RUN_DPAPP = docker run\ + $(DOCKER_FLAGS) \ + -v $(PWD)/bmv2:/bmv2 \ + -v $(PWD)/$(P4_OUTDIR)/dash_pipeline.json:/etc/dash/dash_pipeline.json \ + -v $(PWD)/$(P4_OUTDIR)/dash_pipeline_p4rt.txt:/etc/dash/dash_pipeline_p4rt.txt \ + -v $(PWD)/SAI:/SAI \ + -v $(PWD)/tests:/tests \ + -v $(PWD)/../:/dash \ + --network=host \ + -u $(HOST_USER):$(HOST_GROUP) \ + --rm \ + +.PHONY:dpapp dpapp-clean +dpapp-clean: + $(DOCKER_RUN_DPAPP) \ + --name dash-dpapp-$(USER) \ + -w /dash/dash-pipeline/dpapp $(DOCKER_DPAPP_IMG) \ + make clean + +dpapp: + @echo "Compile dpapp for date-plane app ..." + $(DOCKER_RUN_DPAPP) \ + --name dash-dpapp-$(USER) \ + -w /dash/dash-pipeline/dpapp $(DOCKER_DPAPP_IMG) \ + make + +run-dpapp-bash: + $(DOCKER_RUN_DPAPP) \ + --privileged \ + --name dash-dpapp-$(USER) \ + -w /dash/dash-pipeline/dpapp $(DOCKER_DPAPP_IMG) \ + /bin/bash + +run-dpapp: $(DPAPP_LINK) + $(DOCKER_RUN_DPAPP) \ + --privileged \ + -u root \ + --name dash-dpapp-$(USER) \ + -w /dash/dash-pipeline/dpapp $(DOCKER_DPAPP_IMG) \ + env HOST_INTERFACE=$(DPAPP_LINK_PEER) ./dpapp.sh startup.conf + +kill-dpapp: + -docker kill dash-dpapp-$(USER) + ###################################### # P4 Behavioral-model (switch) TARGETS ###################################### @@ -267,6 +325,7 @@ run-switch: network simple_switch_grpc \ --interface 0@veth0 \ --interface 1@veth2 \ + $(SWITCH_PORT_DPAPP) \ --log-console \ --no-p4 @@ -621,11 +680,40 @@ docker-publish-dash-grpc: docker push $(DOCKER_GRPC_IMG) [ -n $(DOCKER_GRPC_IMG_CTAG) ] && docker push $(DOCKER_GRPC_IMG_NAME):$(DOCKER_GRPC_IMG_CTAG) +############################### + +DOCKER_DPAPP_IMG_TAG = $(shell cat dockerfiles/Dockerfile.dpapp | $(SHA1SUM)) +DOCKER_DPAPP_IMG = $(DOCKER_DPAPP_IMG_NAME):$(DOCKER_DPAPP_IMG_TAG) + +docker-dash-dpapp: + { [ x$(ENABLE_DOCKER_PULL) == xy ] && docker pull $(DOCKER_DPAPP_IMG); } || \ + docker build \ + -f dockerfiles/Dockerfile.dpapp \ + -t $(DOCKER_DPAPP_IMG) \ + --build-arg user=$(DASH_USER) \ + --build-arg group=$(DASH_GROUP) \ + --build-arg uid=$(DASH_UID) \ + --build-arg guid=$(DASH_GUID) \ + --build-arg hostname=$(DASH_HOST) \ + --build-arg available_processors=$(shell nproc) \ + dockerfiles + [ -n $(DOCKER_DPAPP_IMG_CTAG) ] && \ + docker tag $(DOCKER_DPAPP_IMG) $(DOCKER_DPAPP_IMG_NAME):$(DOCKER_DPAPP_IMG_CTAG) + +docker-pull-dash-dpapp: + docker pull $(DOCKER_DPAPP_IMG) + +docker-publish-dash-dpapp: + @echo "Publish $(DOCKER_DPAPP_IMG) - requires credentials, can only do from DASH repo, not a fork" + docker push $(DOCKER_DPAPP_IMG) + [ -n $(DOCKER_DPAPP_IMG_CTAG) ] && \ + docker push $(DOCKER_DPAPP_IMG_NAME):$(DOCKER_DPAPP_IMG_CTAG) + ############################### # BMV2-PKTGEN NETWORKING TARGETS ############################### -network:veth0 veth2 disable-ipv6 +network:veth0 veth2 $(DPAPP_LINK) disable-ipv6 veth0: /sys/class/net/veth0 /sys/class/net/veth0: @@ -644,13 +732,27 @@ veth2: /sys/class/net/veth2 sudo ip link set veth2 mtu 9500 sudo ip link set veth3 mtu 9500 +ifeq ($(HAVE_DPAPP),y) +$(DPAPP_LINK): /sys/class/net/$(DPAPP_LINK) +/sys/class/net/$(DPAPP_LINK): + sudo ip link add name $(DPAPP_LINK) type veth peer name $(DPAPP_LINK_PEER) + sudo ip link set dev $(DPAPP_LINK) up + sudo ip link set dev $(DPAPP_LINK_PEER) up + sudo ip link set $(DPAPP_LINK) mtu 9500 + sudo ip link set $(DPAPP_LINK_PEER) mtu 9500 +endif + disable-ipv6: - ./disable_veth_ipv6.sh + DPAPP_LINK=$(DPAPP_LINK) DPAPP_LINK_PEER=$(DPAPP_LINK_PEER) ./disable_veth_ipv6.sh # Delete veth's, test existence to avoid needless use of sudo network-clean: @-([ -e /sys/class/net/veth0 ] && sudo ip link delete dev veth0 && echo "Deleted veth0/1") || echo "No veth0, not deleting" @-([ -e /sys/class/net/veth2 ] && sudo ip link delete dev veth2 && echo "Deleted veth2/3") || echo "No veth2, not deleting" +ifeq ($(HAVE_DPAPP),y) + @-([ -e /sys/class/net/$(DPAPP_LINK) ] && sudo ip link delete dev $(DPAPP_LINK) \ + && echo "Deleted $(DPAPP_LINK)") || echo "No $(DPAPP_LINK), not deleting" +endif ############################### # IXIA-C TARGETS diff --git a/dash-pipeline/SAI/Makefile b/dash-pipeline/SAI/Makefile index 61908c17f..60d913db4 100644 --- a/dash-pipeline/SAI/Makefile +++ b/dash-pipeline/SAI/Makefile @@ -3,7 +3,7 @@ all: copysrc ./sai_api_gen.py \ /bmv2/dash_pipeline.bmv2/dash_pipeline_p4rt.json \ --ir /bmv2/dash_pipeline.bmv2/dash_pipeline_ir.json \ - --ignore-tables=appliance,eni_meter,slb_decap \ + --ignore-tables=underlay_mac,eni_meter,slb_decap \ --sai-spec-dir=/SAI/specs \ dash diff --git a/dash-pipeline/SAI/SAI b/dash-pipeline/SAI/SAI index 806c65650..000349123 160000 --- a/dash-pipeline/SAI/SAI +++ b/dash-pipeline/SAI/SAI @@ -1 +1 @@ -Subproject commit 806c6565045ab4b6ffe46850f642b21a1eb22788 +Subproject commit 000349123aab8cac5cbd2e5297b98ab6cf8813da diff --git a/dash-pipeline/SAI/specs/dash_appliance.yaml b/dash-pipeline/SAI/specs/dash_appliance.yaml new file mode 100644 index 000000000..8119addc1 --- /dev/null +++ b/dash-pipeline/SAI/specs/dash_appliance.yaml @@ -0,0 +1,35 @@ +!!python/object:utils.sai_spec.sai_api_group.SaiApiGroup +name: dash_appliance +description: DASH appliance +api_type: overlay +sai_apis: +- !!python/object:utils.sai_spec.sai_api.SaiApi + name: dash_appliance + description: DASH appliance + is_object: true + enums: [] + structs: [] + attributes: + - !!python/object:utils.sai_spec.sai_attribute.SaiAttribute + name: SAI_DASH_APPLIANCE_ATTR_LOCAL_REGION_ID + description: Action parameter local region id + type: sai_uint8_t + attr_value_field: u8 + default: '0' + isresourcetype: false + flags: CREATE_AND_SET + object_name: null + allow_null: false + valid_only: null + is_vlan: false + deprecated: false + stats: [] + p4_meta: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4Meta + tables: + - !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaTable + id: 41082190 + actions: + default: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaAction + name: default + id: 29775410 + attr_param_id: {} diff --git a/dash-pipeline/SAI/specs/dash_eni.yaml b/dash-pipeline/SAI/specs/dash_eni.yaml index 2cf2b98d2..25f5e2ff9 100644 --- a/dash-pipeline/SAI/specs/dash_eni.yaml +++ b/dash-pipeline/SAI/specs/dash_eni.yaml @@ -598,6 +598,32 @@ sai_apis: valid_only: null is_vlan: false deprecated: false + - !!python/object:utils.sai_spec.sai_attribute.SaiAttribute + name: SAI_ENI_ATTR_ENABLE_REVERSE_TUNNEL_LEARNING + description: Action parameter enable reverse tunnel learning + type: bool + attr_value_field: booldata + default: 'false' + isresourcetype: false + flags: CREATE_AND_SET + object_name: null + allow_null: false + valid_only: null + is_vlan: false + deprecated: false + - !!python/object:utils.sai_spec.sai_attribute.SaiAttribute + name: SAI_ENI_ATTR_REVERSE_TUNNEL_SIP + description: Action parameter reverse tunnel sip + type: sai_ip_address_t + attr_value_field: ipaddr + default: 0.0.0.0 + isresourcetype: false + flags: CREATE_AND_SET + object_name: null + allow_null: false + valid_only: null + is_vlan: false + deprecated: false stats: - !!python/object:utils.sai_spec.sai_attribute.SaiAttribute name: SAI_ENI_STAT_RX_BYTES diff --git a/dash-pipeline/SAI/specs/dash_flow.yaml b/dash-pipeline/SAI/specs/dash_flow.yaml index 866fcbdb5..5cf17b09b 100644 --- a/dash-pipeline/SAI/specs/dash_flow.yaml +++ b/dash-pipeline/SAI/specs/dash_flow.yaml @@ -28,7 +28,7 @@ sai_apis: description: Action parameter DASH flow enabled key type: sai_dash_flow_enabled_key_t attr_value_field: s32 - default: SAI_DASH_FLOW_ENABLED_KEY_ENI_ADDR + default: SAI_DASH_FLOW_ENABLED_KEY_ENI_MAC isresourcetype: false flags: CREATE_AND_SET object_name: null diff --git a/dash-pipeline/SAI/specs/dash_inbound_routing.yaml b/dash-pipeline/SAI/specs/dash_inbound_routing.yaml index 9a3a6e783..3a18d08f8 100644 --- a/dash-pipeline/SAI/specs/dash_inbound_routing.yaml +++ b/dash-pipeline/SAI/specs/dash_inbound_routing.yaml @@ -141,26 +141,26 @@ sai_apis: p4_meta: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4Meta tables: - !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaTable - id: 38920290 + id: 48545572 actions: SAI_INBOUND_ROUTING_ENTRY_ACTION_TUNNEL_DECAP: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaAction name: SAI_INBOUND_ROUTING_ENTRY_ACTION_TUNNEL_DECAP - id: 22253429 + id: 20241846 attr_param_id: SAI_INBOUND_ROUTING_ENTRY_ATTR_METER_CLASS_OR: 1 SAI_INBOUND_ROUTING_ENTRY_ATTR_METER_CLASS_AND: 2 SAI_INBOUND_ROUTING_ENTRY_ACTION_TUNNEL_DECAP_PA_VALIDATE: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaAction name: SAI_INBOUND_ROUTING_ENTRY_ACTION_TUNNEL_DECAP_PA_VALIDATE - id: 27987616 + id: 32575252 attr_param_id: SAI_INBOUND_ROUTING_ENTRY_ATTR_SRC_VNET_ID: 1 SAI_INBOUND_ROUTING_ENTRY_ATTR_METER_CLASS_OR: 2 SAI_INBOUND_ROUTING_ENTRY_ATTR_METER_CLASS_AND: 3 SAI_INBOUND_ROUTING_ENTRY_ACTION_VXLAN_DECAP: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaAction name: SAI_INBOUND_ROUTING_ENTRY_ACTION_VXLAN_DECAP - id: 32581635 + id: 30272260 attr_param_id: {} SAI_INBOUND_ROUTING_ENTRY_ACTION_VXLAN_DECAP_PA_VALIDATE: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaAction name: SAI_INBOUND_ROUTING_ENTRY_ACTION_VXLAN_DECAP_PA_VALIDATE - id: 22711915 + id: 24066060 attr_param_id: {} diff --git a/dash-pipeline/SAI/specs/dash_pa_validation.yaml b/dash-pipeline/SAI/specs/dash_pa_validation.yaml index c52119654..2c5728405 100644 --- a/dash-pipeline/SAI/specs/dash_pa_validation.yaml +++ b/dash-pipeline/SAI/specs/dash_pa_validation.yaml @@ -70,9 +70,9 @@ sai_apis: p4_meta: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4Meta tables: - !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaTable - id: 35526612 + id: 41827129 actions: SAI_PA_VALIDATION_ENTRY_ACTION_PERMIT: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaAction name: SAI_PA_VALIDATION_ENTRY_ACTION_PERMIT - id: 32591400 + id: 31497578 attr_param_id: {} diff --git a/dash-pipeline/SAI/specs/dash_vip.yaml b/dash-pipeline/SAI/specs/dash_vip.yaml index 590ea7191..d0701c1a0 100644 --- a/dash-pipeline/SAI/specs/dash_vip.yaml +++ b/dash-pipeline/SAI/specs/dash_vip.yaml @@ -64,9 +64,9 @@ sai_apis: p4_meta: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4Meta tables: - !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaTable - id: 45245089 + id: 43450617 actions: SAI_VIP_ENTRY_ACTION_ACCEPT: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4MetaAction name: SAI_VIP_ENTRY_ACTION_ACCEPT - id: 26041632 + id: 22888270 attr_param_id: {} diff --git a/dash-pipeline/SAI/specs/sai_spec.yaml b/dash-pipeline/SAI/specs/sai_spec.yaml index ed86286e3..ed071a761 100644 --- a/dash-pipeline/SAI/specs/sai_spec.yaml +++ b/dash-pipeline/SAI/specs/sai_spec.yaml @@ -14,6 +14,7 @@ api_types: - SAI_API_DASH_VIP - SAI_API_DASH_TUNNEL - SAI_API_DASH_FLOW +- SAI_API_DASH_APPLIANCE object_types: - SAI_OBJECT_TYPE_DASH_ACL_GROUP - SAI_OBJECT_TYPE_DASH_ACL_RULE @@ -39,6 +40,7 @@ object_types: - SAI_OBJECT_TYPE_FLOW_ENTRY_BULK_GET_SESSION_FILTER - SAI_OBJECT_TYPE_FLOW_ENTRY_BULK_GET_SESSION - SAI_OBJECT_TYPE_METER_BUCKET_ENTRY +- SAI_OBJECT_TYPE_DASH_APPLIANCE object_entries: - !!python/object:utils.sai_spec.sai_struct_entry.SaiStructEntry name: direction_lookup_entry @@ -193,12 +195,20 @@ enums: name: NAT_PORT description: '' value: '16' + - !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember + name: TUNNEL + description: '' + value: '32' + - !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember + name: REVERSE_TUNNEL + description: '' + value: '64' - !!python/object:utils.sai_spec.sai_enum.SaiEnum name: sai_dash_flow_enabled_key_t description: '' members: - !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember - name: ENI_ADDR + name: ENI_MAC description: '' value: '1' - !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember @@ -238,7 +248,7 @@ enums: description: '' value: '1' - !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember - name: ENI_ADDR + name: ENI_MAC description: '' value: '2' - !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember @@ -489,3 +499,4 @@ api_groups: - !inc '/SAI/specs/dash_vip.yaml' - !inc '/SAI/specs/dash_tunnel.yaml' - !inc '/SAI/specs/dash_flow.yaml' +- !inc '/SAI/specs/dash_appliance.yaml' diff --git a/dash-pipeline/bmv2/dash_inbound.p4 b/dash-pipeline/bmv2/dash_inbound.p4 index 40148d3c6..14c39590c 100644 --- a/dash-pipeline/bmv2/dash_inbound.p4 +++ b/dash-pipeline/bmv2/dash_inbound.p4 @@ -6,6 +6,7 @@ #include "dash_acl.p4" #include "routing_actions/routing_actions.p4" #include "dash_conntrack.p4" +#include "stages/inbound_routing.p4" control inbound(inout headers_t hdr, inout metadata_t meta) @@ -28,12 +29,14 @@ control inbound(inout headers_t hdr, } #ifdef STATEFUL_P4 - ConntrackOut.apply(1); + ConntrackOut.apply(1); #endif /* STATEFUL_P4 */ #ifdef PNA_CONNTRACK ConntrackOut.apply(hdr, meta); #endif //PNA_CONNTRACK + inbound_routing_stage.apply(hdr, meta); + do_tunnel_encap(hdr, meta, meta.overlay_data.dmac, diff --git a/dash-pipeline/bmv2/dash_metadata.p4 b/dash-pipeline/bmv2/dash_metadata.p4 index 48f01561c..87fbfa597 100644 --- a/dash-pipeline/bmv2/dash_metadata.p4 +++ b/dash-pipeline/bmv2/dash_metadata.p4 @@ -11,7 +11,9 @@ enum bit<32> dash_routing_actions_t { NAT = (1 << 1), NAT46 = (1 << 2), NAT64 = (1 << 3), - NAT_PORT = (1 << 4) + NAT_PORT = (1 << 4), + TUNNEL = (1 << 5), + REVERSE_TUNNEL = (1 << 6) }; enum bit<16> dash_direction_t { @@ -40,6 +42,7 @@ enum bit<16> dash_pipeline_stage_t { // Inbound stages INBOUND_STAGE_START = 100, + INBOUND_ROUTING = 100, // OUTBOUND_STAGE_START // Outbound stages OUTBOUND_STAGE_START = 200, @@ -52,7 +55,7 @@ enum bit<16> dash_pipeline_stage_t { }; enum bit<16> dash_flow_enabled_key_t { - ENI_ADDR = (1 << 0), + ENI_MAC = (1 << 0), VNI = (1 << 1), PROTOCOL = (1 << 2), SRC_IP = (1 << 3), @@ -100,7 +103,7 @@ enum bit<16> dash_flow_entry_bulk_get_session_filter_key_t { INVAILD = 0, FLOW_TABLE_ID = 1, - ENI_ADDR = 2, + ENI_MAC = 2, IP_PROTOCOL = 3, SRC_IP_ADDR = 4, DST_IP_ADDR = 5, @@ -164,6 +167,7 @@ struct eni_data_t { bit<6> dscp; dash_tunnel_dscp_mode_t dscp_mode; outbound_routing_group_data_t outbound_routing_group_data; + IPv4Address vip; } struct meter_context_t { @@ -273,6 +277,7 @@ struct metadata_t { dash_direction_t direction; dash_eni_mac_type_t eni_mac_type; dash_eni_mac_override_type_t eni_mac_override_type; + encap_data_t rx_encap; EthernetAddress eni_addr; bit<16> vnet_id; bit<16> dst_vnet_id; @@ -317,9 +322,12 @@ struct metadata_t { encap_data_t encap_data; // tunnel_data is used by dash_tunnel_id encap_data_t tunnel_data; + bit<1> enable_reverse_tunnel_learning; + IPv4Address reverse_tunnel_sip; overlay_rewrite_data_t overlay_data; bit<16> dash_tunnel_id; bit<32> meter_class; + bit<8> local_region_id; } #endif /* _SIRIUS_METADATA_P4_ */ diff --git a/dash-pipeline/bmv2/dash_pipeline.p4 b/dash-pipeline/bmv2/dash_pipeline.p4 index 0113ec195..db45d4f41 100644 --- a/dash-pipeline/bmv2/dash_pipeline.p4 +++ b/dash-pipeline/bmv2/dash_pipeline.p4 @@ -10,6 +10,7 @@ #include "dash_inbound.p4" #include "dash_conntrack.p4" #include "stages/conntrack_lookup.p4" +#include "stages/pre_pipeline.p4" #include "stages/direction_lookup.p4" #include "stages/eni_lookup.p4" #include "stages/ha.p4" @@ -42,41 +43,6 @@ control dash_ingress( meta.dropped = true; } - action accept() { - } - - @SaiTable[name = "vip", api = "dash_vip"] - table vip { - key = { - hdr.u0_ipv4.dst_addr : exact @SaiVal[name = "VIP", type="sai_ip_address_t"]; - } - - actions = { - accept; - @defaultonly deny; - } - - const default_action = deny; - } - - action set_appliance(EthernetAddress neighbor_mac, - EthernetAddress mac) { - meta.encap_data.underlay_dmac = neighbor_mac; - meta.encap_data.underlay_smac = mac; - } - - /* This table API should be implemented manually using underlay SAI */ - @SaiTable[ignored = "true"] - table appliance { - key = { - meta.appliance_id : ternary; - } - - actions = { - set_appliance; - } - } - #define ACL_GROUPS_PARAM(prefix) \ @SaiVal[type="sai_object_id_t"] bit<16> ## prefix ##_stage1_dash_acl_group_id, \ @SaiVal[type="sai_object_id_t"] bit<16> ## prefix ##_stage2_dash_acl_group_id, \ @@ -114,6 +80,8 @@ control dash_ingress( bit<1> full_flow_resimulation_requested, bit<64> max_resimulated_flow_per_second, @SaiVal[type="sai_object_id_t"] bit<16> outbound_routing_group_id, + bit<1> enable_reverse_tunnel_learning, + @SaiVal[type="sai_ip_address_t"] IPv4Address reverse_tunnel_sip, bit<1> is_ha_flow_owner) { meta.eni_data.cps = cps; @@ -133,6 +101,11 @@ control dash_ingress( meta.encap_data.vni = vm_vni; meta.vnet_id = vnet_id; + meta.enable_reverse_tunnel_learning = enable_reverse_tunnel_learning; + meta.reverse_tunnel_sip = reverse_tunnel_sip; + + meta.routing_actions = meta.routing_actions | dash_routing_actions_t.REVERSE_TUNNEL; + if (meta.is_overlay_ip_v6 == 1) { if (meta.direction == dash_direction_t.OUTBOUND) { ACL_GROUPS_COPY_TO_META(outbound_v6); @@ -172,61 +145,6 @@ control dash_ingress( const default_action = deny; } - action permit() { - } - - action vxlan_decap() {} - action vxlan_decap_pa_validate() {} - - action tunnel_decap(inout headers_t hdr, - inout metadata_t meta, - bit<32> meter_class_or, - @SaiVal[default_value="4294967295"] bit<32> meter_class_and) { - set_meter_attrs(meta, meter_class_or, meter_class_and); - } - - action tunnel_decap_pa_validate(inout headers_t hdr, - inout metadata_t meta, - @SaiVal[type="sai_object_id_t"] bit<16> src_vnet_id, - bit<32> meter_class_or, - @SaiVal[default_value="4294967295"] bit<32> meter_class_and) { - meta.vnet_id = src_vnet_id; - set_meter_attrs(meta, meter_class_or, meter_class_and); - } - - @SaiTable[name = "pa_validation", api = "dash_pa_validation"] - table pa_validation { - key = { - meta.vnet_id: exact @SaiVal[type="sai_object_id_t"]; - hdr.u0_ipv4.src_addr : exact @SaiVal[name = "sip", type="sai_ip_address_t"]; - } - - actions = { - permit; - @defaultonly deny; - } - - const default_action = deny; - } - - @SaiTable[name = "inbound_routing", api = "dash_inbound_routing"] - table inbound_routing { - key = { - meta.eni_id: exact @SaiVal[type="sai_object_id_t"]; - hdr.u0_vxlan.vni : exact @SaiVal[name = "VNI"]; - hdr.u0_ipv4.src_addr : ternary @SaiVal[name = "sip", type="sai_ip_address_t"]; - } - actions = { - tunnel_decap(hdr, meta); - tunnel_decap_pa_validate(hdr, meta); - vxlan_decap; // Deprecated, but cannot be removed until SWSS is updated. - vxlan_decap_pa_validate; // Deprecated, but cannot be removed until SWSS is updated. - @defaultonly deny; - } - - const default_action = deny; - } - action set_acl_group_attrs(@SaiVal[type="sai_ip_addr_family_t", isresourcetype="true"] bit<32> ip_addr_family) { if (ip_addr_family == 0) /* SAI_IP_ADDR_FAMILY_IPV4 */ { if (meta.is_overlay_ip_v6 == 1) { @@ -265,70 +183,10 @@ control dash_ingress( #endif // DPDK_PNA_SEND_TO_PORT_FIX_MERGED #endif // TARGET_DPDK_PNA - if (meta.is_fast_path_icmp_flow_redirection_packet) { - UPDATE_COUNTER(port_lb_fast_path_icmp_in, 0); - } - - if (vip.apply().hit) { - /* Use the same VIP that was in packet's destination if it's - present in the VIP table */ - meta.encap_data.underlay_sip = hdr.u0_ipv4.dst_addr; - } else { - UPDATE_COUNTER(vip_miss_drop, 0); - - if (meta.is_fast_path_icmp_flow_redirection_packet) { - } - } - + pre_pipeline_stage.apply(hdr, meta); direction_lookup_stage.apply(hdr, meta); - - appliance.apply(); - - /* Outer header processing */ eni_lookup_stage.apply(hdr, meta); - // Save the original DSCP value - meta.eni_data.dscp_mode = dash_tunnel_dscp_mode_t.PRESERVE_MODEL; - meta.eni_data.dscp = (bit<6>)hdr.u0_ipv4.diffserv; - - if (meta.direction == dash_direction_t.INBOUND) { - switch (inbound_routing.apply().action_run) { - tunnel_decap_pa_validate: { - pa_validation.apply(); - } - deny: { - UPDATE_ENI_COUNTER(inbound_routing_entry_miss_drop); - } - } - } - - do_tunnel_decap(hdr, meta); - - /* At this point the processing is done on customer headers */ - - meta.is_overlay_ip_v6 = 0; - meta.ip_protocol = 0; - meta.dst_ip_addr = 0; - meta.src_ip_addr = 0; - if (hdr.customer_ipv6.isValid()) { - meta.ip_protocol = hdr.customer_ipv6.next_header; - meta.src_ip_addr = hdr.customer_ipv6.src_addr; - meta.dst_ip_addr = hdr.customer_ipv6.dst_addr; - meta.is_overlay_ip_v6 = 1; - } else if (hdr.customer_ipv4.isValid()) { - meta.ip_protocol = hdr.customer_ipv4.protocol; - meta.src_ip_addr = (bit<128>)hdr.customer_ipv4.src_addr; - meta.dst_ip_addr = (bit<128>)hdr.customer_ipv4.dst_addr; - } - - if (hdr.customer_tcp.isValid()) { - meta.src_l4_port = hdr.customer_tcp.src_port; - meta.dst_l4_port = hdr.customer_tcp.dst_port; - } else if (hdr.customer_udp.isValid()) { - meta.src_l4_port = hdr.customer_udp.src_port; - meta.dst_l4_port = hdr.customer_udp.dst_port; - } - if (!eni.apply().hit) { UPDATE_COUNTER(eni_miss_drop, 0); deny(); @@ -341,21 +199,28 @@ control dash_ingress( conntrack_lookup_stage.apply(hdr, meta); UPDATE_ENI_COUNTER(eni_rx); + + if (meta.direction == dash_direction_t.OUTBOUND) { + UPDATE_ENI_COUNTER(eni_outbound_rx); + } else if (meta.direction == dash_direction_t.INBOUND) { + UPDATE_ENI_COUNTER(eni_inbound_rx); + } + if (meta.is_fast_path_icmp_flow_redirection_packet) { UPDATE_ENI_COUNTER(eni_lb_fast_path_icmp_in); } + do_tunnel_decap(hdr, meta); + ha_stage.apply(hdr, meta); acl_group.apply(); if (meta.direction == dash_direction_t.OUTBOUND) { - UPDATE_ENI_COUNTER(eni_outbound_rx); - meta.target_stage = dash_pipeline_stage_t.OUTBOUND_ROUTING; outbound.apply(hdr, meta); } else if (meta.direction == dash_direction_t.INBOUND) { - UPDATE_ENI_COUNTER(eni_inbound_rx); + meta.target_stage = dash_pipeline_stage_t.INBOUND_ROUTING; inbound.apply(hdr, meta); } diff --git a/dash-pipeline/bmv2/stages/conntrack_lookup.p4 b/dash-pipeline/bmv2/stages/conntrack_lookup.p4 index 99e1c011c..2dbd0d41a 100644 --- a/dash-pipeline/bmv2/stages/conntrack_lookup.p4 +++ b/dash-pipeline/bmv2/stages/conntrack_lookup.p4 @@ -191,7 +191,7 @@ control conntrack_lookup_stage(inout headers_t hdr, inout metadata_t meta) { apply { flow_table.apply(); - if (meta.conntrack_data.flow_table.flow_enabled_key & dash_flow_enabled_key_t.ENI_ADDR != 0) { + if (meta.conntrack_data.flow_table.flow_enabled_key & dash_flow_enabled_key_t.ENI_MAC != 0) { meta.conntrack_data.flow_key.eni_mac = meta.eni_addr; } diff --git a/dash-pipeline/bmv2/stages/direction_lookup.p4 b/dash-pipeline/bmv2/stages/direction_lookup.p4 index c86573a97..a75d35528 100644 --- a/dash-pipeline/bmv2/stages/direction_lookup.p4 +++ b/dash-pipeline/bmv2/stages/direction_lookup.p4 @@ -33,7 +33,7 @@ control direction_lookup_stage( @SaiTable[name = "direction_lookup", api = "dash_direction_lookup"] table direction_lookup { key = { - hdr.u0_vxlan.vni : exact @SaiVal[name = "VNI"]; + meta.rx_encap.vni : exact @SaiVal[name = "VNI"]; } actions = { diff --git a/dash-pipeline/bmv2/stages/inbound_routing.p4 b/dash-pipeline/bmv2/stages/inbound_routing.p4 new file mode 100644 index 000000000..ea56cef5a --- /dev/null +++ b/dash-pipeline/bmv2/stages/inbound_routing.p4 @@ -0,0 +1,79 @@ +#ifndef _DASH_STAGE_INBOUND_ROUTING_P4_ +#define _DASH_STAGE_INBOUND_ROUTING_P4_ + +#include "../dash_routing_types.p4" + +control inbound_routing_stage(inout headers_t hdr, + inout metadata_t meta) +{ + action permit() {} + + @SaiTable[name = "pa_validation", api = "dash_pa_validation"] + table pa_validation { + key = { + meta.vnet_id: exact @SaiVal[type="sai_object_id_t"]; + meta.rx_encap.underlay_sip : exact @SaiVal[name = "sip", type="sai_ip_address_t"]; + } + + actions = { + permit; + @defaultonly drop(meta); + } + + const default_action = drop(meta); + } + + action vxlan_decap() {} + action vxlan_decap_pa_validate() {} + + action tunnel_decap(inout headers_t hdr, + inout metadata_t meta, + bit<32> meter_class_or, + @SaiVal[default_value="4294967295"] bit<32> meter_class_and) { + set_meter_attrs(meta, meter_class_or, meter_class_and); + } + + action tunnel_decap_pa_validate(inout headers_t hdr, + inout metadata_t meta, + @SaiVal[type="sai_object_id_t"] bit<16> src_vnet_id, + bit<32> meter_class_or, + @SaiVal[default_value="4294967295"] bit<32> meter_class_and) { + meta.vnet_id = src_vnet_id; + set_meter_attrs(meta, meter_class_or, meter_class_and); + } + + @SaiTable[name = "inbound_routing", api = "dash_inbound_routing"] + table inbound_routing { + key = { + meta.eni_id: exact @SaiVal[type="sai_object_id_t"]; + meta.rx_encap.vni : exact @SaiVal[name = "VNI"]; + meta.rx_encap.underlay_sip : ternary @SaiVal[name = "sip", type="sai_ip_address_t"]; + } + actions = { + tunnel_decap(hdr, meta); + tunnel_decap_pa_validate(hdr, meta); + vxlan_decap; // Deprecated, but cannot be removed until SWSS is updated. + vxlan_decap_pa_validate; // Deprecated, but cannot be removed until SWSS is updated. + @defaultonly drop(meta); + } + + const default_action = drop(meta); + } + + apply { + if (meta.target_stage != dash_pipeline_stage_t.INBOUND_ROUTING) { + return; + } + + switch (inbound_routing.apply().action_run) { + tunnel_decap_pa_validate: { + pa_validation.apply(); + } + drop: { + UPDATE_ENI_COUNTER(inbound_routing_entry_miss_drop); + } + } + } +} + +#endif /* _DASH_STAGE_INBOUND_ROUTING_P4_ */ \ No newline at end of file diff --git a/dash-pipeline/bmv2/stages/pre_pipeline.p4 b/dash-pipeline/bmv2/stages/pre_pipeline.p4 new file mode 100644 index 000000000..8edfcd602 --- /dev/null +++ b/dash-pipeline/bmv2/stages/pre_pipeline.p4 @@ -0,0 +1,125 @@ +#ifndef _DASH_STAGE_PRE_PIPELINE_P4_ +#define _DASH_STAGE_PRE_PIPELINE_P4_ + +control pre_pipeline_stage(inout headers_t hdr, + inout metadata_t meta) +{ + action accept() {} + + action set_appliance(bit<8> local_region_id) { + meta.local_region_id = local_region_id; + } + + @SaiTable[name = "dash_appliance", api = "dash_appliance", order = 0, isobject="true"] + table appliance { + key = { + meta.appliance_id : exact @SaiVal[type="sai_object_id_t"]; + } + + actions = { + set_appliance; + @defaultonly accept; + } + const default_action = accept; + } + + action set_underlay_mac(EthernetAddress neighbor_mac, + EthernetAddress mac) { + meta.encap_data.underlay_dmac = neighbor_mac; + meta.encap_data.underlay_smac = mac; + } + + /* This table API should be implemented manually using underlay SAI */ + @SaiTable[ignored = "true"] + table underlay_mac { + key = { + meta.appliance_id : ternary; + } + + actions = { + set_underlay_mac; + } + } + + @SaiTable[name = "vip", api = "dash_vip"] + table vip { + key = { + meta.rx_encap.underlay_dip : exact @SaiVal[name = "VIP", type="sai_ip_address_t"]; + } + + actions = { + accept; + @defaultonly drop(meta); + } + + const default_action = drop(meta); + } + + apply { + // Normalize the outer headers. + // This helps us handling multiple encaps and different type of encaps in the future and simplify the later packet processing. + meta.rx_encap.underlay_smac = hdr.u0_ethernet.src_addr; + meta.rx_encap.underlay_dmac = hdr.u0_ethernet.dst_addr; + + if (hdr.u0_ipv4.isValid()) { + meta.rx_encap.underlay_sip = hdr.u0_ipv4.src_addr; + meta.rx_encap.underlay_dip = hdr.u0_ipv4.dst_addr; + } + // IPv6 encap on received packet is not supported yet. + // else if ((hdr.u0_ipv6.isValid()) { + // meta.rx_encap.underlay_sip = hdr.u0_ipv6.src_addr; + // meta.rx_encap.underlay_dip = hdr.u0_ipv6.dst_addr; + // } + + meta.rx_encap.dash_encapsulation = dash_encapsulation_t.VXLAN; + meta.rx_encap.vni = hdr.u0_vxlan.vni; + + // Save the original DSCP value + meta.eni_data.dscp_mode = dash_tunnel_dscp_mode_t.PRESERVE_MODEL; + meta.eni_data.dscp = (bit<6>)hdr.u0_ipv4.diffserv; + + // Normalize the customer headers for later lookups. + meta.is_overlay_ip_v6 = 0; + meta.ip_protocol = 0; + meta.dst_ip_addr = 0; + meta.src_ip_addr = 0; + if (hdr.customer_ipv6.isValid()) { + meta.ip_protocol = hdr.customer_ipv6.next_header; + meta.src_ip_addr = hdr.customer_ipv6.src_addr; + meta.dst_ip_addr = hdr.customer_ipv6.dst_addr; + meta.is_overlay_ip_v6 = 1; + } else if (hdr.customer_ipv4.isValid()) { + meta.ip_protocol = hdr.customer_ipv4.protocol; + meta.src_ip_addr = (bit<128>)hdr.customer_ipv4.src_addr; + meta.dst_ip_addr = (bit<128>)hdr.customer_ipv4.dst_addr; + } + + if (hdr.customer_tcp.isValid()) { + meta.src_l4_port = hdr.customer_tcp.src_port; + meta.dst_l4_port = hdr.customer_tcp.dst_port; + } else if (hdr.customer_udp.isValid()) { + meta.src_l4_port = hdr.customer_udp.src_port; + meta.dst_l4_port = hdr.customer_udp.dst_port; + } + + // The pipeline starts from here and we can use the normalized headers for processing. + if (meta.is_fast_path_icmp_flow_redirection_packet) { + UPDATE_COUNTER(port_lb_fast_path_icmp_in, 0); + } + + if (vip.apply().hit) { + /* Use the same VIP that was in packet's destination if it's present in the VIP table */ + meta.encap_data.underlay_sip = meta.rx_encap.underlay_dip; + } else { + UPDATE_COUNTER(vip_miss_drop, 0); + + if (meta.is_fast_path_icmp_flow_redirection_packet) { + } + } + + appliance.apply(); + underlay_mac.apply(); + } +} + +#endif // _DASH_STAGE_PRE_PIPELINE_P4_ diff --git a/dash-pipeline/disable_veth_ipv6.sh b/dash-pipeline/disable_veth_ipv6.sh index 64bff490d..6d88b1349 100755 --- a/dash-pipeline/disable_veth_ipv6.sh +++ b/dash-pipeline/disable_veth_ipv6.sh @@ -20,3 +20,17 @@ sudo sysctl net.ipv6.conf.veth3.autoconf=0 sudo sysctl net.ipv6.conf.veth3.accept_ra=0 sudo sysctl net.ipv6.conf.veth3.accept_ra_pinfo=0 sudo sysctl net.ipv6.conf.veth3.router_solicitations=0 + +if [ $DPAPP_LINK ]; then +sudo sysctl net.ipv6.conf.$DPAPP_LINK.autoconf=0 +sudo sysctl net.ipv6.conf.$DPAPP_LINK.accept_ra=0 +sudo sysctl net.ipv6.conf.$DPAPP_LINK.accept_ra_pinfo=0 +sudo sysctl net.ipv6.conf.$DPAPP_LINK.router_solicitations=0 +fi +if [ $DPAPP_LINK_PEER ]; then +sudo sysctl net.ipv6.conf.$DPAPP_LINK_PEER.disable_ipv6=1 +sudo sysctl net.ipv6.conf.$DPAPP_LINK_PEER.autoconf=0 +sudo sysctl net.ipv6.conf.$DPAPP_LINK_PEER.accept_ra=0 +sudo sysctl net.ipv6.conf.$DPAPP_LINK_PEER.accept_ra_pinfo=0 +sudo sysctl net.ipv6.conf.$DPAPP_LINK_PEER.router_solicitations=0 +fi diff --git a/dash-pipeline/dockerfiles/DOCKER_DPAPP_IMG.env b/dash-pipeline/dockerfiles/DOCKER_DPAPP_IMG.env new file mode 100644 index 000000000..a36e5dac8 --- /dev/null +++ b/dash-pipeline/dockerfiles/DOCKER_DPAPP_IMG.env @@ -0,0 +1,5 @@ +# Define docker image repo/name:tag +# Changing this will cause build/publish to occur in CI actions +export DASH_ACR_REGISTRY=sonicdash.azurecr.io +export DOCKER_DPAPP_IMG_NAME?=${DASH_ACR_REGISTRY}/dash-dpapp-bldr +export DOCKER_DPAPP_IMG_CTAG?=240827 diff --git a/dash-pipeline/dockerfiles/Dockerfile.dpapp b/dash-pipeline/dockerfiles/Dockerfile.dpapp new file mode 100644 index 000000000..0b90e0485 --- /dev/null +++ b/dash-pipeline/dockerfiles/Dockerfile.dpapp @@ -0,0 +1,86 @@ +# This Dockerfile builds an image used to compile/run dash date-plane app. +FROM sonicdash.azurecr.io/dash-grpc:1.43.2 as grpc +FROM p4lang/behavioral-model@sha256:ce45720e28a96a50f275c1b511cd84c2558b62f2cf7a7e506765183bc3fb2e32 +LABEL maintainer="SONIC-DASH Community" +LABEL description="DASH date-plane app using vpp" + +# Configure make to run as many parallel jobs as cores available +ARG available_processors +ARG MAKEFLAGS=-j$available_processors + +ARG sswitch_grpc=yes +ARG CC=gcc +ARG CXX=g++ +# Set TZ to avoid interactive installer +ENV TZ=America/Los_Angeles +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +ENV GIT_SSL_NO_VERIFY=true + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git \ + build-essential \ + autoconf \ + libtool \ + libtool-bin \ + pkg-config \ + patchelf \ + sudo \ + iproute2 net-tools iputils-ping \ + make + +## Install vpp +RUN apt-get install -y curl +RUN curl -s https://packagecloud.io/install/repositories/fdio/release/script.deb.sh | bash -x +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + vpp vpp-plugin-core vpp-plugin-dpdk vpp-dbg vpp-dev + +# vpp development environment +RUN echo "wireshark-common wireshark-common/install-setuid boolean true" | debconf-set-selections +WORKDIR /var +RUN (git clone https://gerrit.fd.io/r/vpp && \ + cd vpp && UNATTENDED=y make install-dep) + + +WORKDIR /usr/local/lib/ + +# Copy libabsl .a files from p4pi, make shared lib for our use and delete .a's +COPY --from=grpc /usr/local/lib/lib*grpc*.so* \ + /usr/local/lib/libabsl*.so* \ + /usr/local/lib/libgpr*.so* \ + /usr/local/lib/libupb*.so* \ + /usr/local/lib/libre2*.so* \ + /usr/local/lib/libaddress_sorting*.so* \ + /usr/local/lib/libssl*.so* \ + /usr/local/lib/libcrypto*.so* \ + ./ + +# Specify dash sai libs dependency on vpp +RUN patchelf --set-rpath /SAI/lib /usr/bin/vpp +RUN patchelf --add-needed libsai.so \ + --add-needed libprotobuf.so \ + --add-needed libpiprotobuf.so \ + --add-needed libpiprotogrpc.so \ + --add-needed libgrpc++.so.1.43 \ + /usr/bin/vpp +RUN ldconfig + +WORKDIR / + +ARG user +ARG uid +ARG group +ARG guid +ARG hostname + +ENV BUILD_HOSTNAME $hostname +ENV USER $user + +RUN groupadd -f -r -g $guid $group +RUN useradd $user -l -u $uid -g $guid -d /var/$user -m -s /bin/bash +RUN echo "$user ALL=(ALL) NOPASSWD:ALL" >>/etc/sudoers + +USER $user + +CMD /bin/bash diff --git a/documentation/dataplane/dash-bmv2-data-plane-app.md b/documentation/dataplane/dash-bmv2-data-plane-app.md new file mode 100644 index 000000000..33c0ff7ca --- /dev/null +++ b/documentation/dataplane/dash-bmv2-data-plane-app.md @@ -0,0 +1,383 @@ +# DASH BMv2 Data Plane App HLD + +| Rev | Date | Author | Change Description | +| --- | ---- | ------ | ------------------ | +| 0.1 | 06/29/2024 | Junhua Zhai | Initial version | + +1. [Terminology](#1-terminology) +2. [Background](#2-background) +3. [Project scenario](#3-project-scenario) +4. [Resource modeling, requirement, and SLA](#4-resource-modeling-requirement-and-sla) +5. [System Architecture overview](#5-system-architecture-overview) +6. [Detailed design](#6-detailed-design) + - [6.1. DASH metadata](#61-dash-metadata) + - [6.2. Basic Flow](#62-basic-flow) + - [6.3. Flow resimulation](#63-flow-resimulation) + - [6.4. HA flow](#64-ha-flow) + - [6.5. HA flow resimulation](#65-ha-flow-resimulation) + - [6.6. SAI](#66-sai) +7. [Test Plan](#7-test-plan) +8. [Appendix](#8-appendix) + + +## 1. Terminology + +| Term | Explanation | +| --- | --- | +| VPP | Vector Packet Processing | +| DPAPP | Data plane Application | + + +## 2. Background + +Referring to data path logical architecture, shown at the below figure: + +![dash_dpapp_overview](images/dash-bmv2-data-plane-app-overview.drawio.svg) + +Data plane app is another packet processing engine running on CPUs. It adds extra capacities onto DASH capable ASIC: +- It serves the slow (exception) path of packet processing to implement complicated pipeline logic, which is hard to be done in ASIC, for example adding/updating/removing table entry inline. +- It optionally implements some DASH-SAI APIs, which are not proper/easy to be done upon ASIC SDK. + +### 2.1. Scope + +This document only focuses on describing the design of a data-plane app example, as a proof of concept, how it cooperates with [DASH pipeline BMv2](https://github.com/sonic-net/DASH/tree/main/dash-pipeline/bmv2) to implement DASH data plane. The app will be based on [VPP](https://fd.io/). + +## 3. Project scenario + +### 3.1. Stateful packet process - flow +- Flow Creation + + In DASH pipeline, after flow keys are well extracted, packet goes to flow + lookup stage. It does the flow lookup. If any flow is matched, packet is + marked a flow-hit flag, otherwise flow-miss flag. If flow-hit, the packet + should bypass policy matching stages, such as ACL, routing, etc, and + continue doing the rest stages, like flow actions. If flow-miss, the packet + should do policy matching stages and then be forwarded to data-plane app. + The data-plane app will use dash-sai APIs to create flow in flow table, and + then re-inject the packet back to pipeline. + +- Flow Deletion + + In flow lookup stage, TCP FIN/RST packet should be treated specially with + FLOW-DELETE hint and forwarded to data-plane app. +- Flow Age-out + + Data-plane app can help the data plane implement the flow age-out + mechanism by bridging the gap in the current data plane engine - bmv2. + For example, in flow lookup stage, if packet hits one flow, pipeline may refresh + flow timestamp via p4 extern function. Data-plane app periodically scans flow table + and check if flow is timed out according to (current timestamp - flow timestamp) vs idle + timeout value. + +### 3.2. HA +- Inline flow replication + + In HA context, Active data-plane app creates flow, serializes the flow in +metadata header, glues it with original packet, and sends the packet to Standby +via DPU data-plane channel. Standby data-plane app recreates +the flow, and acknowledges Active data-plane app to finish flow creation. The +same logic can apply for flow deletion, flow age-out. +- Flow bulk sync + + Flow bulk sync replicates batch flows from one DPU to another to make flow + table consistent on Active and Standby DPUs. When HA agents starts a bulk + sync via DASH SAI, Active data-plane app will walk flow table based on sync + method (perfect/range), generate batch flows and send them to Standby + data-plane app with gRPC via control-plane channel. Standby date-plane app + will create flows in order. + +### 3.3. Flow re-simulation +When SONiC changes polices via DASH SAI, some flows might need to be updated +to get the latest policy applied. Data-plane app is raised to re-simulate +flow. In HA context, Active data-plane app also needs to sync the updated +flows to Standby. + +## 4. Resource modeling, requirement, and SLA +Refer to [SONiC DASH HLD](https://github.com/sonic-net/DASH/blob/main/documentation/general/dash-sonic-hld.md) + +## 5. System Architecture overview + +![dash_dpapp_arch](images/dash-bmv2-data-plane-app-arch.drawio.svg) + +Referring to the above figure, data-plane app overall is a multi-thread application based on vpp, running in a standalone container. It includes these components: + +- master thread, it runs dashsai server to receive dashsai requests (dash object CRUD) via northbound RPC channel and then invoke DASH SAI APIs to handle them. The server also processes flow creation/deletion notification from workers. +- worker threads, they serve as an exception (slow) path of packet processing, running +on multiple CPUs. It creates a flow in local flow table and notifies dashsai +server to offload it to BMv2 flow table. The packet is temporarily queued. +After workers know the success of flow offloading to BMv2, they dequeue the packet and send it back to P4 pipeline via DPAPP port. The workers also do flow age-out task with proper scheduling. +- flow table, is a local cache of BMv2 flow table. +- DASH SAI, is a unique interface for DASH object CRUD of DASH pipeline, implemented by DASH BMv2. +- DPAPP port, is a veth interface and connects to BMv2 via veth pair. It serves as datapath channel to receive/send all packets between date-plane app and BMv2. Generally the port supports multi RSS queues, each queue binds to one worker thread. + +**Note:** For simplicity and concept verification, worker threads may directly call DASH SAI to offload flow to BMv2. The concern is that DASH SAI blocking API can block packet processing of workers. + +## 6. Detailed design + +Referring to the below figure from [HA API HLD](https://github.com/sonic-net/DASH/blob/main/documentation/high-avail/ha-api-hld.md), it greatly outlines the whole packet flow in data plane for both standalone and HA context. + +![packet flow in data plane](https://github.com/sonic-net/DASH/blob/main/documentation/high-avail/images/ha-bm-packet-flow.svg) + +From the perspective of DPAPP, its core task in slow path is to create flow in flow table in case of flow miss in Flow (Conntrack) lookup stage, and then maintain flow state. The sub sections will depict these functions around flow object. + + +### 6.1. DASH metadata +DASH metadata records the packet processing result of DASH pipeline. It can have the following info to help flow creation/removal, etc operations: +- flow key entry - eni_mac, vnet_id, 5-tuples +- common flow data - state, direction, eni, flow actions, metering class, policy ID +- flow overlay rewrite data +- flow underlay encapsulation data +- flow tunnel encapsulation data + +When DASH pipeline requests DPAPP for flow creation, it encapsulates DASH metadata in an ethernet frame with EtherType DASH_METADATA and appends the original customer packet. The packet sent to DPAPP is like: + +``` + Ethernet HEADER | DASH metadata | customer packet +``` + +The number of DASH_METADATA is 0x876D, which reuses the number of EtherType SECURE_DATA (vpp/src/vnet/ethernet/types.def at master · FDio/vpp · GitHub). +DASH metadata is encoded in protocol DASH_METADATA, whose message format is defined as the below figure: + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source | Type |Subtype| Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Flow key entry | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Flow data : + : | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Source: Where is packet from, dash_packet_source_t + Type: Packet type under certain source + Subtype: Packet subtype under certain type + Length: Total length of DASH header + Flow key entry: eni_mac, 5-tuples + Flow data: As next figure +``` + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Reserved |U| Sync State | Direction | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Version | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Action | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Meter Class | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | [overlay rewrite] : + : | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | [tunnel encap] : + : | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | [underlay encap] : + : | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + U: flag is-unidirectional, 0, false, 1, true + Sync State: flow sync state + Direction: flow direction, 0, oubound; 1, inbound + Version: flow version + Action: flow action + Meter Class: Meter class object + ... +``` + +### 6.2. Basic Flow + +Basic flow means basic operation of flow, i.e. CRUD. Generally DPAPP may run a flow state machine to manage flow state and do proper operation of CRUD. The state transformation is triggered by these events: +- Packets with DASH metadata from pipeline, for flow creation/deletion/refresh +- Timer, for flow age-out + +The below sequence chart shows the detail steps of flow creation, flow state becomes FLOW_CREATED. Other operations are similar. + +```mermaid +sequenceDiagram + participant P as P4 Pipeline + participant R as P4 Runtime + participant W as DPAPP worker + participant M as DPAPP master + autonumber + P->>+W: DASH metadata (source PIPELINE) + customer packet + W->>+W: Create flow in local flow table + W->>+M: Request current flow offload + M->>+R: Call DASH SAI create_flow (gRPC) + R->>R: Write flow to P4 flow table + R->>M: create_flow returns OK + M->>W: Reply flow offload done + W->>P: DASH metadata (source DPAPP) + customer packet +``` + +It is remarkable that DPAPP worker should not call DASH SAI API directly, otherwise DASH SAI may block DPAPP worker to handle other packets. Each flow has a packet queue. In step 3, it enqueues current packet firstly and then requests flow offload. In step 8, it dequeues the packet and then sends the packet back to P4 pipeline. + +### 6.3. Flow resimulation + +```mermaid +sequenceDiagram + participant C as DASH SAI Client + participant P as P4 Pipeline + participant R as P4 Runtime + participant W as DPAPP worker + participant S as DASH SAI Server + autonumber + C->>+S: ENI_ATTR_FULL_FLOW_RESIMULATION_REQUESTED + S->>+R: Call DASH SAI set_eni_attr (epoch) + R->>R: Update eni epoch + R->>-S: set_eni_attr returns OK + S->>-C: FULL_FLOW_RESIMULATION_REQUESTED return OK + P->>+P: packet comes and starts flow resimulation due to eni.epoch > flow.epoch + P->>+W: slow-path, update flow data +``` +Note: epoch could be an internal attribute of eni and flow, which is not +visible in public SAI. + +### 6.4. HA flow +Base on basic flow, HA flow adds an extra FLOW_SYNCED state, which involves +extra sync for request/response ping-pong between DPAPP and PEER DPAPP. + +### 6.5. HA flow resimulation +Same as section 6.3, but also do the below: +- Sync eni epoch +- Update flow data in PEER + +### 6.6. SAI +DPAPP will implement a dashsai server, which is a shim layer of underlying DASH SAI. It works as below: +1. Accept the request from dashsai client +2. Parse the request, identify DASH SAI object and construct its attr list +3. If the SAI object is implemented in DPAPP itself, call its function +4. Else invoke DASH SAI API of the object +5. Reply dashsai client + +## 7. Test Plan +Test objective: +1. Verify flow CRUD in standalone and HA +2. Verify DASH SAI + +Test scope: + + The test only covers the functionality verification of DPAPP. The test of performance and capacity is out of scope. + +Test environment: +- BMv2 P4 pipeline + DPAPP +- scapy + +The below table lists all test cases, all of which must be scripted based on DASH test infrastructure. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Test Suite

Test Case

Test Description

1. Unit Test

1.1 DASH SAI

+

Target: Verify dashsai server behaving well as DASH SAI proxy

+

Steps:

+
    +
  1. Start P4 BMv2 and DPAPP
  2. +
  3. dashsai client sends request for DASH SAI objects CRUD
  4. +
  5. Verify dashsai server handles them correctly
  6. +
  7. Verify dashsai server calls proper DASH SAI APIs
  8. +
  9. Verify dashsai server replies dashsai client
  10. +
+

1.2 Flow Creation

+

Target: Verify flow creation OK

+

Steps:

+
    +
  1. Start P4 BMv2 and DPAPP
  2. +
  3. Configure vnet2vnet via DASH SAI
  4. +
  5. Send TCP SYN packet to BMV2 external port
  6. +
  7. Verify flow via vppctl “show dash flow”
  8. +
  9. Verify flow via DASH SAI flow get
  10. +
+

1.3 Flow Deletion

+

Target: Verify flow deletion OK

+

Steps:

+
    +
  1. Same test steps as test case 1.2
  2. +
  3. Send TCP FIN packet to BMV2 external port
  4. +
  5. Verify flow not found via vppctl “show dash flow”
  6. +
  7. Verify flow not found via DASH SAI flow get
  8. +
+

1.4 Flow Age-out

+

Target: Verify flow age-out OK

+

Steps:

+
    +
  1. Same test steps as test case 1.2
  2. +
  3. Change flow timeout to 10s
  4. +
  5. Wait 10s
  6. +
  7. Verify flow not found via DASH SAI flow get
  8. +
+

1.5 Flow resimulation

Target: Verify flow resimulation

+

Steps:

+
    +
  1. Same test steps as test case 1.2
  2. +
  3. Update outbound CA->PA mapping via SAI
  4. +
  5. Verify underlay DIP is changed in CA entry of table outbound_ca_to_pa
  6. +
  7. Get flow attr UNDERLAY_DIP via SAI flow get
  8. +
  9. Verify the above attr value unchanged
  10. +
  11. Send TCP DATA packet
  12. +
  13. Get flow attr UNDERLAY_DIP again via SAI flow get
  14. +
  15. Verify the attr value changed to new PA
  16. +
  17. Trigger multiple flows of one eni
  18. +
  19. Set eni attr FULL_RESIMULATION_REQ via SAI
  20. +
+

1.6 1.2-1.5 in HA

Target: Verify flow CRUD in HA

1.7 bulk sync in HA

Target: Verify bulk sync in HA

2. Negative

+

2.1 Flow not creation

+
+

Target: Verify flow not creation OK

+

Steps:

+
    +
  1. Send an illegal TCP packet (only ACK flag) as the first packet to the external port
  2. +
  3. P4 pipeline drop it or let DPAPP determines to drop it ??
  4. +
+
+ +## 8. Appendix +- [DASH FLOW API](https://github.com/sonic-net/DASH/blob/main/documentation/dataplane/dash-flow-api.md) +- [SONiC DASH HLD](https://github.com/sonic-net/DASH/blob/main/documentation/general/dash-sonic-hld.md) +- [DASH scaling requirements](https://github.com/sonic-net/SONiC/blob/master/doc/dash/dash-sonic-hld.md#14-scaling-requirements) +- [Smart Switch HA HLD](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md) +- [HA API HLD](https://github.com/sonic-net/DASH/blob/main/documentation/high-avail/ha-api-hld.md) +- [Flow Resimulation](https://github.com/sonic-net/DASH/blob/main/documentation/dataplane/dash-flow-resimulation.md) +- [DASH-SAI pipeline packet flow](https://github.com/sonic-net/DASH/blob/main/documentation/general/dash-sai-pipeline-packet-flow.md) + diff --git a/documentation/dataplane/dash-flow-api.md b/documentation/dataplane/dash-flow-api.md index d699b3091..5710bce10 100644 --- a/documentation/dataplane/dash-flow-api.md +++ b/documentation/dataplane/dash-flow-api.md @@ -97,7 +97,7 @@ typedef enum _sai_dash_flow_enabled_key_t { SAI_DASH_FLOW_ENABLED_KEY_NONE = 0, - SAI_DASH_FLOW_ENABLED_KEY_ENI_ADDR = 1 << 1, + SAI_DASH_FLOW_ENABLED_KEY_ENI_MAC = 1 << 1, SAI_DASH_FLOW_ENABLED_KEY_VNI = 1 << 2, @@ -517,7 +517,7 @@ uint32_t attr_count = 3; sai_attribute_t attr_list[3]; attr_list[0].id = SAI_FLOW_TABLE_ATTR_DASH_FLOW_ENABLED_KEY; attr_list[0].value = SAI_DASH_FLOW_ENABLED_KEY_PROTOCOL | - SAI_DASH_FLOW_ENABLED_KEY_ENI_ADDR | + SAI_DASH_FLOW_ENABLED_KEY_ENI_MAC | SAI_DASH_FLOW_ENABLED_KEY_VNI | SAI_DASH_FLOW_ENABLED_KEY_SRC_IP | SAI_DASH_FLOW_ENABLED_KEY_DST_IP | diff --git a/documentation/dataplane/images/dash-bmv2-data-plane-app-arch.drawio.svg b/documentation/dataplane/images/dash-bmv2-data-plane-app-arch.drawio.svg new file mode 100644 index 000000000..c3c9e933e --- /dev/null +++ b/documentation/dataplane/images/dash-bmv2-data-plane-app-arch.drawio.svg @@ -0,0 +1,465 @@ + + + + + + + + + +
+
+
+ DASH BMv2 +
+
+
+
+ + DASH BMv2 + +
+
+ + + + +
+
+
+ P4 tables +
+
+
+
+ + P4 tables + +
+
+ + + + +
+
+
+ P4 pipeline +
+
+
+
+ + P4 pipeline + +
+
+ + + + +
+
+
+ P4 Runtime +
+
+
+
+ + P4 Runtime + +
+
+ + + + +
+
+
+ DPU-Port1 +
+
+
+
+ + DPU-Port1 + +
+
+ + + + +
+
+
+ DPU-PortN +
+
+
+
+ + DPU-PortN + +
+
+ + + + +
+
+
+ ...... +
+
+
+
+ + ...... + +
+
+ + + + +
+
+
+ CPU Port +
+
+
+
+ + CPU Port + +
+
+ + + + + + + +
+
+
+ RD +
+
+
+
+ + RD + +
+
+ + + +
+
+
+
+
+
+
+
+
+
+
+
+
+ + +
+
+ + + + + + +
+
+
+ Dataplane App +
+
+
+
+ + Dataplane App + +
+
+ + + + +
+
+
+ DPAPP port +
+
+
+
+ + DPAPP port + +
+
+ + + + + +
+
+
+ + worker threads + +
+
+
+
+ + worker threads + +
+
+ + + + + + + + + + +
+
+
+ Flow table +
+
+
+
+ + Flow table + +
+
+ + + + + +
+
+
+ RD +
+
+
+
+ + RD + +
+
+ + + + +
+
+
+ DASH SAI +
+
+
+
+ + DASH SAI + +
+
+ + + + + + +
+
+
+ + master thread + +
+
+
+
+ + master thread + +
+
+ + + + +
+
+
+ dashsai server +
+
+
+
+ + dashsai serv... + +
+
+ + + + +
+
+
+ GRPC +
+
+
+
+ + GRPC + +
+
+ + + + +
+
+
+ GRPC +
+
+
+
+ + GRPC + +
+
+ + + +
+
+
+ veth pair +
+
+
+
+ + veth pair + +
+
+ + + + +
+
+
+ RPC +
+
+
+
+ + RPC + +
+
+ + + + + + +
+
+
+ RW +
+
+
+
+ + RW + +
+
+ + + + +
+
+
+ + sairedis + +
+
+
+
+ + sairedis + +
+
+ + + + +
+
+
+ dashsai client +
+
+
+
+ + dashsai client + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/documentation/dataplane/images/dash-bmv2-data-plane-app-overview.drawio.svg b/documentation/dataplane/images/dash-bmv2-data-plane-app-overview.drawio.svg new file mode 100644 index 000000000..bfa2dbf63 --- /dev/null +++ b/documentation/dataplane/images/dash-bmv2-data-plane-app-overview.drawio.svg @@ -0,0 +1,4 @@ + + + +
Hardware
DASH capable ASICs
Kernel Space
ASIC Driver
User Space
Technology Provider ASIC SDK
DASH-SAI APIs
Technology Provider Data Plane App
\ No newline at end of file diff --git a/documentation/express-route-service/express-route-gateway-bypass.md b/documentation/express-route-service/express-route-gateway-bypass.md index ebb8c7ae9..d82acedd6 100644 --- a/documentation/express-route-service/express-route-gateway-bypass.md +++ b/documentation/express-route-service/express-route-gateway-bypass.md @@ -3,6 +3,7 @@ | Rev | Date | Author | Change Description | | --- | ---- | ------ | ------------------ | | 0.1 | 07/09/2024 | Riff Jiang | Initial version | +| 0.2 | 08/30/2024 | Riff Jiang | Simplify the MSEE failover handling | 1. [1. Terminology](#1-terminology) 2. [2. Background](#2-background) @@ -18,10 +19,6 @@ 2. [5.2. MSEE selection and failover handling](#52-msee-selection-and-failover-handling) 1. [5.2.1. Background](#521-background) 2. [5.2.2. MSEE device selection](#522-msee-device-selection) - 1. [5.2.2.1. Reverse routing stage](#5221-reverse-routing-stage) - 1. [5.2.2.1.1. Reverse routing group](#52211-reverse-routing-group) - 2. [5.2.2.1.2. Reverse routing group entry](#52212-reverse-routing-group-entry) - 2. [5.2.2.2. Reverse tunnel table and entry](#5222-reverse-tunnel-table-and-entry) 3. [5.2.3. MSEE failover handling using flow resimulation](#523-msee-failover-handling-using-flow-resimulation) 1. [5.2.3.1. Reverse tunnel updates](#5231-reverse-tunnel-updates) 2. [5.2.3.2. Maintaining per connection consistency (PCC)](#5232-maintaining-per-connection-consistency-pcc) @@ -138,92 +135,32 @@ typedef enum _sai_direction_lookup_entry_attr_t { #### 5.2.1. Background -In the ER gateway bypass scenario, the return traffic needs to be routed back to MSEE per customer's configuration, which is a MSEE device list per subnet. +In the ER gateway bypass scenario, return traffic needs to be routed back to MSEE where the traffic is originated. -Since the flow creation happens at the MSEE-to-PLS direction, as this is where the first packet lands on DASH. This configuration is essentially a LPM table lookup using the source IP to find the reverse tunnel to use. The tunnel provides a list of next hops as an ECMP group. And DASH needs to select a valid next hop to use and save it into the reverse flow to avoid further lookup. - -Furthermore, when MSEE failover, we need to update the reverse tunnel on all existing flows to point to the working ones. +Furthermore, when MSEE failover, we need to update the reverse tunnel on all existing flows to point to the new MSEE where the traffic will be coming from after failover. #### 5.2.2. MSEE device selection -To handle this, we are leveraging similar concepts as the RPF (Reverse Path Forwarding) in network devices, which checks the source IP during the forwarding path. - -##### 5.2.2.1. Reverse routing stage - -To support this behavior, we add the reverse routing stage in DASH. - -Unlike the regular routing stage, the reverse routing stage will not be specified in the routing types and will be default to be executed before the action apply stage. +Unlike VM-to-* scenarios, the reverse tunnel can only have 1 single destination IP that specified using the `SAI_ENI_ATTR_VM_UNDERLAY_DIP` attribute. MSEE devices can have active-active pairs, so this single IP solution won't work. -If no route entries are being hit in this stage, the packet should not be dropped but continue to later stages. This behavior is the same as having a default route with allow action. +To handle this, we learn the PLS-to-MSEE tunnel information from the first packet when flow is created, including the encap type, key, and destination IP, and create the reverse tunnel in the reverse flow, which make sure the return traffic will be sent back to the originating MSEE. To avoid changing the behavior of VM-to-* scenarios, this behavior can be turned on or off by the SDN controller for each ENI using a dedicated attribute (see below). -###### 5.2.2.1.1. Reverse routing group - -The reverse routing group is used for defining the reverse routing table. Once created, we can bind its object id to ENI to make it taking effect: +In some cases, we will need the ability to specify the source IP of the reverse tunnel. To support this, we added another new attribute in the ENI object. | SAI attribute name | Type | Description | | --------------- | ---- | ----------- | -| SAI_OUTBOUND_REVERSE_ROUTING_GROUP_ATTR_DISABLED | bool | If true, this entries in this routing group will not take effect, but won't drop the packets. | - -To specify which reverse group should be used on an ENI, we add the following attribute on ENI: - -| SAI attribute name | Type | Description | -| --------------- | ---- | ----------- | -| SAI_ENI_ATTR_OUTBOUND_REVERSE_ROUTING_GROUP_ID | sai_object_id_t | Reverse routing group object ID | - -###### 5.2.2.1.2. Reverse routing group entry - -The reverse routing table is essentially a LPM lookup table with each entry takes the IP prefix as key: - -| SAI entry field | Type | Description | -| --------------- | ---- | ----------- | -| outbound_reverse_routing_group_id | sai_object_id_t | SAI object ID of the reverse routing table | -| source | sai_ip_prefix_t | Source IP prefix | - -The attributes will only have action and reverse tunnel id, as it won't change anything else: - -| SAI attribute name | Type | Description | -| ------------------ | ---- | ----------- | -| SAI_OUTBOUND_REVERSE_ROUTE_ENTRY_ATTR_ACTION | sai_outbound_reverse_route_entry_action_t | Action to take | -| SAI_OUTBOUND_REVERSE_ROUTE_ENTRY_ATTR_REVERSE_TUNNEL_ID | sai_object_id_t | SAI object ID of the reverse tunnel | -| SAI_OUTBOUND_REVERSE_ROUTING_ENTRY_ATTR_ROUTING_ACTIONS_DISABLED_IN_FLOW_RESIMULATION | sai_uint64_t | Routing actions that need to be disabled in flow resimulation. | - -##### 5.2.2.2. Reverse tunnel table and entry - -Besides the reverse routing stage, we also need to split the tunnel table into tunnel and reverse tunnel table. It makes the API clean, also allows P4 to support it, because each P4 table can be only matched once in the pipeline: - -The reverse tunnel table will have the following attributes that is common to all next hops: - -| Attribute name | Type | Description | -| --- | --- | --- | -| SAI_DASH_REVERSE_TUNNEL_ATTR_DASH_ENCAPSULATION | sai_dash_encapsulation_t | Encapsulation type, such as VxLan, NvGRE. Optional. If not specified, the encap from tunnel will be used. | -| SAI_DASH_REVERSE_TUNNEL_ATTR_TUNNEL_KEY | sai_uint32_t | Tunnel key used in the encap, e.g. VNI in VxLAN. | - -A reverse tunnel supports multiple destination IPs as an ECMP group, the reverse tunnel member and reverse tunnel next hop objects will be used to specify these information: - -- The reverse tunnel next hop object defines the tunnel information for each destination: - - | Attribute name | Type | Description | - | --- | --- | --- | - | SAI_DASH_REVERSE_TUNNEL_NEXT_HOP_ATTR_DIP | sai_ip_address_t | Destination IP used in tunnel. | - | SAI_DASH_REVERSE_TUNNEL_NEXT_HOP_ATTR_SIP | sai_ip_address_t | Source IP used in tunnel. | - -- The reverse tunnel member defines the bindings between tunnel and next hop: - - | Attribute name | Type | Description | - | --- | --- | --- | - | SAI_DASH_REVERSE_TUNNEL_MEMBER_ATTR_TUNNEL_ID | sai_object_id_t | Tunnel Id | - | SAI_DASH_REVERSE_TUNNEL_MEMBER_ATTR_TUNNEL_NEXT_HOP_ID | sai_object_id_t | Tunnel next hop id | +| SAI_ENI_ATTR_REVERSE_TUNNEL_SIP | sai_ip_address_t | Source IP used in the reverse tunnel. | +| SAI_ENI_ATTR_ENABLE_REVERSE_TUNNEL_LEARNING | bool | If true, the reverse tunnel will be learned from the first packet. | #### 5.2.3. MSEE failover handling using flow resimulation ##### 5.2.3.1. Reverse tunnel updates -When MSEE failover, the tunnel configuration will be updated. Since the tunnel is route based, we can leverage the existing [flow resimulation APIs](../dataplane/dash-flow-resimulation.md) to update the nexthop list. +When MSEE failover, it could impact all flows that are created. In this case, we will leverage the full flow resimulation API in the existing [flow resimulation APIs](../dataplane/dash-flow-resimulation.md) to help us keep the reverse tunnel updated. -Whenever the reverse tunnel id is set, we consider the reverse tunnel routing action bit is set in the pipeline, this allows us to control the flow resimulation behavior using the same mechanism. +Following the flow resimulation design, all flow resimulation request will be explicitly requested by the SDN controller. Hence, when MSEE failover, the SDN controller will request a full flow resimulation that marks all flows as resimulation needed. -1. Step 1: Update the DIP list in the reverse tunnel object, which will cover all the new flows. -2. Step 2: Request a full flow resimulation, which will cover the existing flows. +When the next packet from MSEE comes in for a flow, the flow will be resimulated and the reverse tunnel will be updated accordingly, by picking up the new MSEE tunnel information. ##### 5.2.3.2. Maintaining per connection consistency (PCC) @@ -236,15 +173,15 @@ For more information, please refer to the [flow resimulation scope control APIs] ##### 5.2.3.3. Flow resimulation on return path -In flow resimulation, flow is usually updated when packets lands on the forwarding path, however this introduces extra downtime for the reverse tunnel update. The reason is that the return packet will still take the old tunnel in the flow, and being sent to the wrong destination, although the policy is updated and flow resimulation is triggered. +As we can see, in flow resimulation, flow is updated when packets lands on the forwarding path, which introduces extra downtime for the reverse tunnel update. The reason is that the return packet will still take the old tunnel in the flow, and being sent to the wrong destination, until the next MSEE-to-PLS packet comes in and updates the flow. -To avoid this impact, it is required for the return packet to check the resimulation status and update the reverse tunnel in the flow if needed. This means when a packet coming from PLS to MSEE, if reverse tunnel is changed, the reverse routing stage should be evaluated and updating the flow accordingly. +For now, this behavior will be acceptable, as the downtime is limited to the time between the MSEE failover and the next packet from MSEE. However, this behavior will be improved in the future. ##### 5.2.3.4. Flow resimulation on flow redirected flows Another thing in flow resimulation is [load balancer fast path flow redirection](../load-bal-service/fast-path-icmp-flow-redirection.md) related. When a flow is redirected by fast path ICMP packet, this flow will be ignored in the flow resimulation. However, this behavior should only apply for the forwarding side of transformation, but not the reverse side. -This means when a packet lands on DASH pipeline and it belongs to a flow that is redirected by fast path ICMP packet, the reverse routing stage should be evaluated and updating the flow accordingly. +This means when a packet lands on DASH pipeline and it belongs to a flow that is redirected by fast path ICMP packet, the reverse tunnel should still be evaluated and updating accordingly. ## 6. References