selfcheck.py: add hardware health checks (#192)

* selfcheck: Add hardware health checks

* docs: Update selfcheck documentation (en/ru)

* selfcheck: "Hardware health" -> "RPi health"
This commit is contained in:
Alexey Rogachevskiy
2019-11-25 20:51:27 +03:00
committed by Oleg Kalachev
parent 7dc82e8154
commit 4a67783706
3 changed files with 42 additions and 2 deletions

View File

@@ -715,6 +715,41 @@ def check_network():
failure('Not found %s in /etc/hosts, ROS will malfunction if network interfaces are down, https://clever.coex.tech/hostname', ros_hostname)
@check('RPi health')
def check_rpi_health():
# `vcgencmd get_throttled` output codes taken from
# https://github.com/raspberrypi/documentation/blob/JamesH65-patch-vcgencmd-vcdbg-docs/raspbian/applications/vcgencmd.md#get_throttled
# TODO: support more base platforms?
FLAG_UNDERVOLTAGE_NOW = 0x1
FLAG_FREQ_CAP_NOW = 0x2
FLAG_THROTTLING_NOW = 0x4
FLAG_THERMAL_LIMIT_NOW = 0x8
FLAG_UNDERVOLTAGE_OCCURRED = 0x10000
FLAG_FREQ_CAP_OCCURRED = 0x20000
FLAG_THROTTLING_OCCURRED = 0x40000
FLAG_THERMAL_LIMIT_OUCCURRED = 0x80000
try:
# vcgencmd outputs a single string in a form of
# <parameter>=<value>
# In case of `get_throttled`, <value> is a hexadecimal number
# with some of the FLAGs OR'ed together
output = subprocess.check_output(['vcgencmd', 'get_throttled'])
except OSError:
failure('Could not call vcgencmd binary; not a Raspberry Pi?')
return
throttle_mask = int(output.split('=')[1], base=16)
if throttle_mask & (FLAG_THROTTLING_NOW | FLAG_THROTTLING_OCCURRED):
failure('System throttled to prevent damage')
if throttle_mask & (FLAG_UNDERVOLTAGE_NOW | FLAG_UNDERVOLTAGE_OCCURRED):
failure('Not enough power for onboard computer, flight inadvisable')
if throttle_mask & (FLAG_FREQ_CAP_NOW | FLAG_FREQ_CAP_OCCURRED):
failure('CPU frequency reduced to avoid overheating')
if throttle_mask & (FLAG_THERMAL_LIMIT_NOW | FLAG_THERMAL_LIMIT_OUCCURRED):
failure('CPU over soft temperature limit, expect performance loss')
def selfcheck():
check_image()
check_clever_service()
@@ -731,6 +766,7 @@ def selfcheck():
check_optical_flow()
check_vpe()
check_rangefinder()
check_rpi_health()
check_cpu_usage()
check_boot_duration()

View File

@@ -21,7 +21,9 @@ Description of some checks:
* Global position (GPS) — checks for presence of global position data (GPS module is required for this check);
* Camera — checks for proper operation of the Raspberry camera.
* ArUco — checks whether [ArUco](aruco.md) detection is working
* VPE — checks whether VPE data is published
* Rangefinder — checks whether [rangefinder](laser.md) data is published
* VPE — checks whether VPE data is published.
* Rangefinder — checks whether [rangefinder](laser.md) data is published.
* RPi health checks the [onboard computer](raspberrypi.md) status.
* CPU usage checks the CPU load of the onboard computer.
> **Caution** Pay attention on the checks marked with *WARN* sign. If necessary, contact [Copter Express technical support](tg://resolve?domain=COEXHelpdesk).

View File

@@ -21,5 +21,7 @@ rosrun clever selfcheck.py
* ArUco проверка работы [распознавания ArUco-маркеров](aruco.md).
* VPE проверка правильности работы VPE.
* Rangefinder проверка работы [дальномера](laser.md).
* RPi health проверка состояния [бортового компьютера](raspberrypi.md).
* CPU usage проверка загруженности процессора бортового компьютера.
> **Caution** Обращайте пристальное внимание на предупреждения, отмеченные строкой *WARN*. При необходимости, обращайтесь в [Техподдержку Copter Express](tg://resolve?domain=COEXHelpdesk).