100% CPU usage at random times

Hello everyone.
I have MikroTik RB-941. At random times, the processor is 100% loaded. Profile shows that it is mainly console, management and spi. It happens that only one of the services loads the processor up to 100%. There is another 941 and 951 with the same config (with the exception of subnets) they do not observe this. There is nothing in scripts and scheduler. RouterOS 6.42.3.

Config:

# feb/10/2020 10:33:46 by RouterOS 6.46.3
#
# model = RB941-2nD
/interface bridge
add name=bridge1
/interface wireless
set [ find default-name=wlan1 ] adaptive-noise-immunity=ap-and-client-mode \
    antenna-gain=0 band=2ghz-b/g/n country=russia default-authentication=no \
    default-forwarding=no disabled=no distance=indoors frequency=2442 \
    hw-protection-mode=rts-cts hw-retries=15 installation=indoor mode=\
    ap-bridge multicast-helper=full ssid=Inet wireless-protocol=802.11 \
    wps-mode=disabled
/interface ethernet
set [ find default-name=ether3 ] disabled=yes
/interface wireless nstreme
set wlan1 enable-polling=no
/interface list
add name=Internet
/interface wireless security-profiles
set [ find default=yes ] authentication-types=wpa-psk,wpa2-psk eap-methods="" \
    mode=dynamic-keys supplicant-identity=MikroTik
/ip pool
add name=dhcp_pool0 ranges=192.168.23.15-192.168.23.200
add name=dhcp_pool1 ranges=192.168.15.15-192.168.15.200
/ip dhcp-server
add address-pool=dhcp_pool0 disabled=no interface=wlan1 name=dhcp_wlan
add address-pool=dhcp_pool1 disabled=no interface=bridge1 name=dhcp_lan
/interface bridge port
add bridge=bridge1 interface=ether1
add bridge=bridge1 interface=ether2
/interface detect-internet
set detect-interface-list=all
/interface list member
add interface=ether4 list=Internet
/interface wireless access-list
add allow-signal-out-of-range=3s forwarding=no interface=wlan1 signal-range=\
    -83..120 vlan-mode=no-tag
/ip address
add address=192.168.15.1/24 interface=bridge1 network=192.168.15.0
add address=192.168.23.1/24 interface=wlan1 network=192.168.23.0
/ip dhcp-client
add disabled=no interface=ether4
/ip dhcp-server network
add address=192.168.15.0/24 dns-server=193.58.251.251 gateway=192.168.15.1
add address=192.168.23.0/24 dns-server=193.58.251.251 gateway=192.168.23.1
/ip firewall filter
add action=accept chain=forward comment=\
    "1.1. Forward and Input Established and Related connections" \
    connection-state=established,related
add action=drop chain=forward connection-state=invalid
add action=accept chain=input connection-state=established,related
add action=drop chain=input connection-state=invalid
add action=drop chain=forward connection-nat-state=!dstnat connection-state=\
    new in-interface-list=Internet
add action=add-src-to-address-list address-list=ddos-blacklist \
    address-list-timeout=1d chain=input comment=\
    "1.2. DDoS Protect - Connection Limit" connection-limit=100,32 \
    in-interface-list=Internet protocol=tcp
add action=tarpit chain=input connection-limit=3,32 protocol=tcp \
    src-address-list=ddos-blacklist
add action=jump chain=forward comment="1.3. DDoS Protect - SYN Flood" \
    connection-state=new jump-target=SYN-Protect protocol=tcp tcp-flags=syn
add action=jump chain=input connection-state=new in-interface-list=Internet \
    jump-target=SYN-Protect protocol=tcp tcp-flags=syn
add action=return chain=SYN-Protect connection-state=new limit=200,5:packet \
    protocol=tcp tcp-flags=syn
add action=drop chain=SYN-Protect connection-state=new protocol=tcp \
    tcp-flags=syn
add action=drop chain=input comment="1.4. Protected - Ports Scanners" \
    src-address-list="Port Scanners"
add action=add-src-to-address-list address-list="Port Scanners" \
    address-list-timeout=none-dynamic chain=input in-interface-list=Internet \
    protocol=tcp psd=21,3s,3,1
add action=drop chain=input comment="1.5. Protected - WinBox Access" \
    src-address-list="Black List Winbox"
add action=add-src-to-address-list address-list="Black List Winbox" \
    address-list-timeout=1m chain=input connection-state=new dst-port=8291 \
    in-interface-list=Internet log=yes log-prefix="BLACK WINBOX" protocol=tcp \
    src-address-list="Winbox Stage 3"
add action=add-src-to-address-list address-list="Winbox Stage 3" \
    address-list-timeout=1m chain=input connection-state=new dst-port=8291 \
    in-interface-list=Internet protocol=tcp src-address-list="Winbox Stage 2"
add action=add-src-to-address-list address-list="Winbox Stage 2" \
    address-list-timeout=1m chain=input connection-state=new dst-port=8291 \
    in-interface-list=Internet protocol=tcp src-address-list="Winbox Stage 1"
add action=add-src-to-address-list address-list="Winbox Stage 1" \
    address-list-timeout=1m chain=input connection-state=new dst-port=8291 \
    in-interface-list=Internet protocol=tcp
add action=accept chain=input dst-port=8291 in-interface-list=Internet \
    protocol=tcp
add action=accept chain=input comment="1.8. Access Normal Ping" \
    in-interface-list=Internet limit=50/5s,2:packet protocol=icmp
add action=drop chain=input comment="1.9. Drop All Other" in-interface-list=\
    Internet
/ip firewall nat
add action=masquerade chain=srcnat src-address=192.168.15.0/24
add action=masquerade chain=srcnat src-address=192.168.23.0/24
/ip firewall raw
add action=drop chain=prerouting dst-port=137,138,139 in-interface-list=\
    Internet protocol=udp
/ip service
set telnet disabled=yes
set ftp disabled=yes
set www disabled=yes
set ssh disabled=yes
set api disabled=yes
set api-ssl disabled=yes
/system clock
set time-zone-name=Europe/Moscow
/system identity
set name=MikroTik_Right_2
/system ntp client
set server-dns-names=pool.ntp.org
/system watchdog
set ping-start-after-boot=30m watch-address=10.77.50.1
/tool graphing interface
add store-on-disk=no
/tool romon
set enabled=yes id=00:00:00:00:11:02
/tool romon port
set [ find default=yes ] forbid=yes
add disabled=no interface=ether4
/tool sniffer
set filter-interface=ether4

We have very similar situation with same model in one deployment, all other RB941-2nD deployed are working fine with same config

RB941-2nD with problems seems to spike when hours change (see attachment from zabbix)

Also when making export command in console cpu goes to 100% and it makes quite slow export (see attachment from profiler when running export in terminal)
Screenshot 2020-02-24 at 09.48.48.png
Screenshot 2020-02-24 at 09.50.57.png

I am having exactly the same issue. I hope someone will be able to shed some light on this issue.

Thanks in advance.

Hi!! We notice some issues like those you are commenting in several rb2011 boards.
in our case the process involved in CPU consumption was “disk”, particularly accessing the Files section within winbox or during export command execution in terminal.
Disk space available was 40% to 30%. No matter the ROS version from 6,36, 6.47.7 to 6.47.8.
The only way to get rid of that was making a netinstall.
Hope this help to put some light on this enigma.
Kind regards!

This is a widespread problem on older models. But Mikrotik team does not want to solve this problem (
http://forum.mikrotik.com/t/rb751-u-2nhd-100-cpu/51701/1
http://forum.mikrotik.com/t/rb751-cpu-usage-get-too-high/54092/1

Hi almost same our Mikrotik CCR 1072-1G-8SFP+, we experience also 100% CPU but on peak hours, we have 1Gbps Bandwidth and use PPPoE server with have 600 clients

Hi, have you tried using Splunk to monitor your solution. I have made a Splunk program for Mikrotik and there are a section for PPPoE.
I am interest in feedback on it. See link in my signature.

I have 5 x 1G connection on CCR 1036 we mitigate this issue by separating the PPPoE service and disable connection tracking and we have another 1036 doing solely the NAT, this setup has 1K users at peak the cpu usage for the CCR doing a nat is around 40% and the bandwidth for each link is around 750Mb on average.

Just my 0.2$

Hi Jotne, we can try this monitoring tool thanks

thanks loloski, this is actually the last option what we are thinking to do, We already tried pcq to make the parent queue lessen that the test result says that it must be less than 25 queue
so that the bandwidth will not divided, but this not work, so I think we will go on this last option thank you!

by the way our NAT also is in another appliance, So our 1072 has only PPPoE service with simple queues, do you have any suggestion regarding our queues ?