Load balancing configuration

I have three ISPs with bandwidths of Ethernet 1: 130 Mbps, Ethernet 2: 20 Mbps, and Ethernet 16: 10 Mbps respectively. I have configured these connections on a MikroTik router; however, the overall internet performance is very slow. Despite having a 130 Mbps link, I am only able to utilize around 40 Mbps from it, while the 20 Mbps and 10 Mbps links are being fully utilized. Below is the configuration I have applied.
Please help me; I am new to this mikrotik forum.

Config from router:

#
#
# model = CCR2004-16G-2S+

/disk
add media-interface=none media-sharing=no slot=tmp1 tmpfs-max-size=0 type=\
    tmpfs
/interface bridge
add name=bridge1
/interface ethernet
set [ find default-name=ether1 ] name="ether1 NANO WAN"
set [ find default-name=ether2 ] name="ether2 B-Mobile WAN" rx-flow-control=\
    auto tx-flow-control=auto
set [ find default-name=ether16 ] name="ether16 Tashi "
/interface list
add name=WAN
add name=LAN
/port
set 0 name=serial0
/queue type
add kind=pcq name="download 5M" pcq-classifier=dst-address pcq-limit=5120KiB \
    pcq-rate=5120 pcq-total-limit=5120KiB
add kind=pcq name="Upload 5M" pcq-classifier=src-address pcq-limit=5120KiB \
    pcq-rate=5M pcq-total-limit=5120KiB
/interface bridge port
add bridge=bridge1 interface=ether3
add bridge=bridge1 interface=ether4
add bridge=bridge1 interface=ether5
add bridge=bridge1 interface=ether6
add bridge=bridge1 interface=ether7
add bridge=bridge1 interface=ether8
add bridge=bridge1 interface=ether9
add bridge=bridge1 interface=ether10
add bridge=bridge1 interface=ether11
add bridge=bridge1 interface=ether12
add bridge=bridge1 interface=ether13
add bridge=bridge1 interface=ether14
add bridge=bridge1 interface=ether15
/ip firewall connection tracking
set enabled=yes
/ip neighbor discovery-settings
set discover-interface-list=all
/interface list member
add interface="ether1 NANO WAN" list=WAN
add interface=bridge1 list=LAN
add interface="ether2 B-Mobile WAN" list=WAN
add interface="ether16 Tashi " list=WAN
/ip address
add address=X.X.X.X/30 interface="ether1 NANO WAN" network=\
    103.80.110.16
add address=50.50.50.1/24 interface=bridge1 network=50.50.50.0
add address=X.X.X.x/30 interface="ether2 B-Mobile WAN" network=\
    X.X.X.X.X
add address=X.X.X.X/27 interface="ether16 Tashi " network=X.X.X.X
/ip cloud
set ddns-enabled=yes
/ip cloud advanced
set use-local-address=yes
/ip dhcp-client
add disabled=yes interface="ether1 NANO WAN"
/ip dhcp-server
add address-pool=*5 disabled=yes interface=bridge1 name=dhcp1
/ip dhcp-server network
add address=50.50.50.0/24 dns-server=8.8.8.8,8.8.4.4 gateway=50.50.50.1
/ip dns
set allow-remote-requests=yes servers="8.8.8.8,202.144.128.241,X.X.X.X,2\
    02.144.128.205,103.80.109.90,8.8.4.4"
/ip firewall filter
add action=fasttrack-connection chain=forward connection-state=\
    established,related,new hw-offload=yes
add action=accept chain=forward connection-state=established,related,new
/ip firewall mangle
add action=mark-connection chain=input comment=\
    "B-Mobile and Nano Mark Connection" in-interface="ether1 NANO WAN" \
    new-connection-mark="Nano Mark Connection  1" passthrough=yes
add action=mark-connection chain=input in-interface="ether2 B-Mobile WAN" \
    new-connection-mark="B-Mobile  Mark Connection 2" passthrough=yes
add action=mark-connection chain=input in-interface="ether16 Tashi " \
    new-connection-mark="Tashi  Mark Connection 3" passthrough=yes
add action=mark-routing chain=output comment=\
    "B-Mobile and Nano Mark Routing " connection-mark=\
    "Nano Mark Connection  1" new-routing-mark=main passthrough=yes
add action=mark-routing chain=output connection-mark=\
    "B-Mobile  Mark Connection 2" new-routing-mark=main passthrough=yes
add action=mark-routing chain=output connection-mark=\
    "Tashi  Mark Connection 3" new-routing-mark=main passthrough=yes

add action=accept chain=prerouting comment="B-Mobile and Nano Prerouting " \
    dst-address=X.X.X.X/30 in-interface=bridge1
add action=accept chain=prerouting dst-address=X.X.X.X/30 in-interface=\
    bridge1
add action=accept chain=prerouting dst-address=X.X.X.X/27 in-interface=\
    bridge1

add action=mark-routing chain=prerouting comment=\
    "B-Mobile and Nano Mark Prerouting" connection-mark=\
    "Nano Mark Connection  1" in-interface=bridge1 new-routing-mark=main \
    passthrough=yes
add action=mark-routing chain=prerouting connection-mark=\
    "B-Mobile  Mark Connection 2" in-interface=bridge1 new-routing-mark=main \
    passthrough=yes
add action=mark-routing chain=prerouting connection-mark=\
    "Tashi  Mark Connection 3" in-interface=bridge1 new-routing-mark=main \
    passthrough=yes
add action=mark-connection chain=prerouting comment=\
    "B-Mobile and Nano Mark Routing " dst-address-type=!local in-interface=\
    bridge1 new-connection-mark="Nano Mark Connection  1" passthrough=yes \
    per-connection-classifier=both-addresses-and-ports:3/0
add action=mark-connection chain=prerouting dst-address-type=!local \
    in-interface=bridge1 new-connection-mark="B-Mobile  Mark Connection 2" \
    passthrough=yes per-connection-classifier=both-addresses-and-ports:3/1
add action=mark-connection chain=prerouting dst-address-type=!local \
    in-interface=bridge1 new-connection-mark="Tashi  Mark Connection 3" \
    passthrough=yes per-connection-classifier=both-addresses-and-ports:3/2
/ip firewall nat
add action=masquerade chain=srcnat out-interface="ether16 Tashi "
add action=masquerade chain=srcnat out-interface="ether2 B-Mobile WAN"
add action=masquerade chain=srcnat out-interface="ether1 NANO WAN"
/ip proxy
set anonymous=yes cache-on-disk=yes cache-path=tmp1 max-client-connections=\
    1000 max-fresh-time=8w4d max-server-connections=1000
/ip route
add disabled=no distance=1 dst-address=0.0.0.0/0 gateway=X.X.X.X \
    routing-table=main scope=255 suppress-hw-offload=no target-scope=10
add check-gateway=ping disabled=no distance=1 dst-address=0.0.0.0/0 gateway=\
    X.X.X.X routing-table=main scope=255 suppress-hw-offload=no \
    target-scope=10
add check-gateway=ping disabled=no distance=1 dst-address=0.0.0.0/0 gateway=\
    X.X.X.X routing-table=main scope=255 suppress-hw-offload=no \
    target-scope=10
add check-gateway=ping disabled=no distance=1 dst-address=0.0.0.0/0 gateway=\
    X.X.X.X routing-table=main scope=255 suppress-hw-offload=no \
    target-scope=10
add disabled=no distance=1 dst-address=0.0.0.0/0 gateway=X.X.X.X \
    routing-table=main scope=255 suppress-hw-offload=no target-scope=10
add check-gateway=ping disabled=no distance=1 dst-address=0.0.0.0/0 gateway=\
    X.X.X.X routing-table=main scope=255 suppress-hw-offload=no \
    target-scope=10
/ip upnp
set allow-disable-external-interface=yes enabled=yes
/routing settings
set single-process=yes
/system identity
set name="E-Net Info Limited .com"
/system note
set show-at-login=no
/system routerboard settings
set enter-setup-on=delete-key
/tool romon
set enabled=yes

OK, can you PLEASE change your post ?

  • a title is a TITLE, not something where you can post the complete problem description (you don’t have any space for that either, as you can see)
  • Your description is incomplete (because you used the title section). I have indicated the section where it should be included.
  • it is recommended to include config code between proper quotes for easier reading (fixed that for you)
  • You don’t specify which connection is linked to which ISP speed
  • …

But it would be better if you’d used 3 backticks instead of 1 for syntax highlighting.

I did use 3 backticks, the < / > icon in the edit window.
Even went back to look for another post with syntax highlighting, copied the backticks and they are the same :thinking:

EDIT: Ah, found it … you need the 3 backticks AND the word “routeros” so it knows which syntax highlighting to use.
Learned something new today :grinning_face:

2 Likes

The suspense is killing me …

… or 4 backticks, easier.

Exactly my first thoughts…

Currently, although you have some PCC mangle rules as well as some mark-routing mangle rules, all routing marks are set to main which means all connections use the main routing table (and you don’t have any other table anyway).

In the main table you have your WAN routes (with dst-address=0.0.0.0/0) with the same distance (distance=1), which mean you are currently “load balancing” using ECMP, with the default hashing policy being L3.

With ECMP and multipath-hash-policy=l3, for packets going out to the internet, their src-address and dst-address will be used to calculate a hash number, then depending on this value (it probably performs a modulo on the number of ECMP routes with shortest distance currently satisfying the destination address), the packets will be using one of those ECMP routes. Which means packets with the same pair of src-address and dst-address will all go out through the same route. If one device in the LAN (one src-address) makes connections to one server on the internet (a single dst-address), all those connections will use the same ISP. This is similar to the PCC mode both-addresses.

If you change the hashing policy to L4 (/ip settings set ipv4-multipath-hash-policy=l4 and /ipv6 settings set multipath-hash-policy=l4), then similar to the PCC mode both-addresses-and-ports, the four values of src-address, src-port, dst-address, dst-port will be used to calculate the hash number. If one device in LAN makes connections to a single server on the internet, it can still result in multiple different hash numbers, due to the src-port of the connections being different. Which means those connections might be distributed between the ECMP routes and use multiple ISPs.

However, regardless of the L3 or L4 ECMP hashing mode, if there are enough different src-address and dst-address pairs (or the 4-tuples in case of L4) the connections will tend to be equally distributed between the ECMP routes, which means each of your 3 ISPs line will carry approximately the same amount of connections, independent of the available throughput. Which means the slower ISPs lines might be overloaded while the fastest one might not use the full available capacity.

If you want a fairer distribution of the connections across the ISPs, you probably need to resort to the PCC mangle rules. You’ll need to:

  1. Create three additional routing tables for ISP #1 (130 Mbps), ISP #2 (20 Mbps) and ISP #3 (10 Mbps), each has the corresponding default route (dst-address=0.0.0.0/0). Named the tables USE_ISP1, USE_ISP2, USE_ISP3 or something like that.

  2. Change these first 6 rules of your Mangles table:

    /ip firewall mangle
    add action=mark-connection chain=input comment=\
        "B-Mobile and Nano Mark Connection" in-interface="ether1 NANO WAN" \
        new-connection-mark="Nano Mark Connection  1" passthrough=yes
    add action=mark-connection chain=input in-interface="ether2 B-Mobile WAN" \
        new-connection-mark="B-Mobile  Mark Connection 2" passthrough=yes
    add action=mark-connection chain=input in-interface="ether16 Tashi " \
        new-connection-mark="Tashi  Mark Connection 3" passthrough=yes
    add action=mark-routing chain=output comment=\
        "B-Mobile and Nano Mark Routing " connection-mark=\
        "Nano Mark Connection  1" new-routing-mark=main passthrough=yes
    add action=mark-routing chain=output connection-mark=\
        "B-Mobile  Mark Connection 2" new-routing-mark=main passthrough=yes
    add action=mark-routing chain=output connection-mark=\
        "Tashi  Mark Connection 3" new-routing-mark=main passthrough=yes
    

    to this:

    /ip firewall mangle
    add action=mark-connection chain=input comment=\
        "B-Mobile and Nano Mark Connection" in-interface="ether1 NANO WAN" \
        connection-mark=no-mark connection-state=new \
        new-connection-mark="Nano Mark Connection 1" passthrough=yes
    add action=mark-connection chain=input in-interface="ether2 B-Mobile WAN" \
        connection-mark=no-mark connection-state=new \
        new-connection-mark="B-Mobile Mark Connection 2" passthrough=yes
    add action=mark-connection chain=input in-interface="ether16 Tashi " \
        connection-mark=no-mark connection-state=new \
        new-connection-mark="Tashi Mark Connection 3" passthrough=yes
    add action=mark-routing chain=output comment=\
        "B-Mobile and Nano Mark Routing" connection-mark=\
        "Nano Mark Connection 1" new-routing-mark=USE_ISP1 passthrough=no
    add action=mark-routing chain=output connection-mark=\
        "B-Mobile Mark Connection 2" new-routing-mark=USE_ISP2 passthrough=no
    add action=mark-routing chain=output connection-mark=\
        "Tashi Mark Connection 3" new-routing-mark=USE_ISP3 passthrough=no
    

    (fixed some double space in the connection mark names, added connection-mark=no-mark connection-state=new to the mark-connection rules, set passthrough=no for the mark-routing rules, and set the correct routing mark for those rules)

  3. Keep the current next 3 accept rules without modifications:

    /ip firewall mangle
    add action=accept chain=prerouting comment="B-Mobile and Nano Prerouting " \
        dst-address=X.X.X.X/30 in-interface=bridge1
    add action=accept chain=prerouting dst-address=X.X.X.X/30 in-interface=\
        bridge1
    add action=accept chain=prerouting dst-address=X.X.X.X/27 in-interface=\
        bridge1
    
  4. Remove the rest of the current rules at the bottom of the Mangles table and replace them with:

  5. Add these three PCC rules:

    /ip firewall mangle
    add action=mark-connection chain=prerouting dst-address-type=!local \
        in-interface=bridge1 connection-mark=no-mark connection-state=new \
        new-connection-mark="B-Mobile Mark Connection 2" passthrough=yes \
        per-connection-classifier=both-addresses-and-ports:8/3
    add action=mark-connection chain=prerouting dst-address-type=!local \
        in-interface=bridge1 connection-mark=no-mark connection-state=new \
        new-connection-mark="Tashi Mark Connection 3" passthrough=yes \
        per-connection-classifier=both-addresses-and-ports:16/0
    add action=mark-connection chain=prerouting \
        comment="B-Mobile and Nano Mark Routing " dst-address-type=!local \
        in-interface=bridge1 connection-mark=no-mark connection-state=new \
        new-connection-mark="Nano Mark Connection 1" passthrough=yes
    

    Please note that connection-mark=no-mark connection-state=new has been added to the three rules. Also, the mark-connection rule for the 1st ISP (the one with 130 Mbps) has been moved down below the two other rules and has no per-connection-classifier condition! Double spaces in the connection mark names have been corrected.

    • The rule for ISP 2 has a PCC denominator of 8, which means 1/8 of the connections should land here (20 Mbps / (130 + 20 + 10) Mbps). The remainder of 3 is a odd number and will not clash with the next rule.

    • The rule for ISP 3 has a PCC denominator of 16. Because it has 1/16 of the throughput (10 Mbps/160 Mbps), the chosen remainder is 0.

    • After these two rules, all of the not yet marked new outgoing connections will be marked for ISP 1, which would get about 13/16 of the connections (1 - 1/8 - 1/16)

  6. Add the corresponding mark-routing rules:

    /ip firewall mangle
    add action=mark-routing chain=prerouting comment=\
        "B-Mobile and Nano Mark Prerouting" connection-mark=\
        "Nano Mark Connection 1" in-interface=bridge1 new-routing-mark=USE_ISP1 \
        passthrough=no
    add action=mark-routing chain=prerouting connection-mark=\
        "B-Mobile Mark Connection 2" in-interface=bridge1 new-routing-mark=USE_ISP2 \
        passthrough=no
    add action=mark-routing chain=prerouting connection-mark=\
        "Tashi Mark Connection 3" in-interface=bridge1 new-routing-mark=USE_ISP3 \
        passthrough=no
    

    (fixed double spaces, use correct routing marks, and set passthrough=no).

With those changes, the number of connections should be better distributed between the three ISP lines. But it depends on the actual ports and IP addresses. Furthermore, some connections might need to transfer a lot more data than others, but they might be put in the slowest ISP route after the hashes have been calculated. So the bandwidth usage can still be unfairly distributed.

To be honest, with such disparity in available throughput (130 Mbps vs 20 & 10 Mbps) I would NOT bother with load balancing at all and would have most of the connections use the 130 Mbps line. The two others will be used as failover and you can maybe route some light traffic like DNS, NTP, etc… through them.

Why? Let’s say you use the PCC configuration above where theoretically 13/16 of the number of connections would use the 130 Mbps line, 2/16 use the 2nd ISP and 1/16 use the 3rd ISP. But you then can have situation where you try to download some single multi GB resource from a remote server, the calculated hash for that connection might end up putting you on the 10 Mbps route, making the download 13x slower compared to when you hadn’t use load balancing at all (just using ISP1 as main and the other two as failover will always make sure that the download always use ISP1).

Thanks, I will try to configure it and update you accordingly. However, I am a bit confused regarding your instruction where you mentioned “both-addresses-and-ports:8/3” and “both-addresses-and-ports:16/0”.

Could you kindly elaborate a bit more on what exactly these values refer to? A little clarification would be greatly appreciated.

Also, just to give you more contexts, we have over 180 plus users in the office,

If you have a video link or tutorial, that would also greatly help us in understanding the configuration better and resolving the issue more efficiently.

Looking forward to your guidance. Thanks once again!

Let’s see if I can be of help.

The “separation” is made by difference, first you tell the router what it should send to the slower interfaces and what remains goes to the the faster one.
You have (reversed):

  1. 10
  2. 20
  3. 130

The total is 10+20+130=160, then:

  1. 10/160= 1/16
  2. 20/160= 2/16=1/8
  3. 130/160= 13/16 the rest

PCC (per connection classifier) docs:

You essentially divide the total with the first number:
Example 1, two connections, each 50% or 1/2:

  1. per-connection-classifier=both-addresses-and-ports:2/0
  2. per-connection-classifier=both-addresses-and-ports:2/1

Example 2, three connections, each 33% or 1/3:

  1. per-connection-classifier=both-addresses-and-ports:3/0
  2. per-connection-classifier=both-addresses-and-ports:3/1
  3. per-connection-classifier=both-addresses-and-ports:3/2

BUT also:

  1. per-connection-classifier=both-addresses-and-ports:3/0
  2. per-connection-classifier=both-addresses-and-ports:3/1
  3. (nothing as it is implied that what is not caught by the two above will use the remaining)

The second numbers, the remainder (the 3 and the 0 in CGGXANNX suggestion) can be “random” as long as they are not duplicated.

See also this:

1 Like

Hi, if you have that many users then you can go ahead and use PCC to take advantage of all three WANs.

As for the meaning of “both-addresses-and-ports:8/3” and “both-addresses-and-ports:16/0”:

Like you can see, they are used as value for the condition named per-connection-classifier. It’s a matching condition, same as in-interface=bridge1 is a matching condition that needs to be satisfied for the action of the rule to be executed. With in-interface=bridge1 the condition is met only if the incoming interface of the packet was bridge1 otherwise the rule is skipped.

With the condition per-connection-classifier=both-addresses-and-ports:16/0 the following will be done:

  • Some deterministic calculation (probably consisting of some multiplication/addition/modulo/bit shifting/xor-ing/etc… operations) will be performed on all 4 properties of the packet src-address, src-port, dst-address, dst-port (because the classifier was both-addresses-and-ports, with a both-addresses classifier for example, then the ports will be ignored and not part of the calculation). This calculation (hashing) will produce a 32-bit number between 0 and 4294967295. The hashing algorithm is deterministic, so the same tuple of src-address, src-port, dst-address, dst-port values will always produce the same number.

  • Next is the 16 part: This is the value of the denominator. The 32-bit value calculated above will no be divided by this denominator value of 16 to get the remainder. This is a modulo operation that will produce one of the 16 possible whole numbers between 0 and 15 (you divide a number by 16 then the remainder can be 0, or 1, or 2, … or 15).

  • Next is the 0 part of per-connection-classifier=both-addresses-and-ports:16/0: This specifies that the condition is only considered “matched” if the remainder produced above is exactly 0. If that value is 1, or 2, … or 15 then there is no match and this rule is skipped. Which means this rule will match for about 1:16 of the produced 32-bit hash values (numbers between 0 and 4294967295) above.

  • In RouterOS connection tracking, a connection is identified by the 4 src-address, src-port, dst-address, dst-port values plus the protocol. Which means all packets of the same connection in one direction will produce the same 32-bit hash number, and as a result, the same remainder for a given denominator (16 in the above example). If you have a lot of different connections (different src-address, src-port, dst-address, dst-port tuples) and the hashing algorithm is good, then the produced 32-bit numbers will be evenly spread out on the 0 .. 4294967295 range. Which means the condition above will match about 1/16 of the connections (connections, not packets, a busy connection will have a lot more packets than an idle connection).

So, if we use that condition and then have the mark-routing action to set the routing mark for ISP3 for outgoing packets (outgoing due to the in-interface=bridge1 condition), about 1/16 of the connections will use ISP3 to go to the internet, which suits the (10 Mbps) : (160 Mbps total) ratio.

Now, for the other two ISPs, in theory we could write 15 more PCC rules:

  • 2 with per-connection-classifier=both-addresses-and-ports:16/1 and per-connection-classifier=both-addresses-and-ports:16/2 for ISP2. And they will match when the modulo by 16 produce either 1 or 2 as remainder, this effectively puts about 2/16 of the connections to go out by ISP2, matching the 20 Mbps / 160 Mbps available throughput.

  • then 13 rules with 16/3, 16/4, …, 16/14, 16/15 with action marking the connection to use ISP1, for the available 130 Mbps : 160 Mbps.

But that would be a lot of boring rules. Instead, we use per-connection-classifier=both-addresses-and-ports:8/3 for ISP2. This takes the 32-bit hash value and divide it by 8 to get the remainder, and compare the remainder with 3. If you divided a whole number by 8 and got 3 as remainder, that that same number, if divided by 16, will either produce a remainder value of 3 or 11 (= 8 + 3).

One rule with per-connection-classifier=both-addresses-and-ports:8/3 is equivalent to two rules with per-connection-classifier=both-addresses-and-ports:16/3 and per-connection-classifier=both-addresses-and-ports:16/11, and matching 32-bit values won’t match with per-connection-classifier=both-addresses-and-ports:16/0 of the previous rule (that why I wrote in the previous post that we choose an odd number, such numbers never produce remainder 0 when divided by 16). You get the desired 2:16 ratio with only one PCC rules.

As for the 3rd rule: it replaces the 13 remaining rules that you would use for ISP1. Because after 1:16 of the connections have matched the ISP3 rule, they all have connection marks. Same with the 2:16 other connections that matched the ISP2 rule. Which means all not yet marked connections (connection-mark=no-mark) that remain are the 13:16 part that we want to match for ISP1.

Thank you. :folded_hands: :folded_hands: