]> git.openstreetmap.org Git - nominatim.git/commitdiff
Enhanced and refactored 'collect_os_info.py'
authorTareq Al-Ahdal <tareqoalahdal@gmail.com>
Fri, 12 Aug 2022 22:13:05 +0000 (06:13 +0800)
committerTareq Al-Ahdal <tareqoalahdal@gmail.com>
Fri, 12 Aug 2022 22:13:05 +0000 (06:13 +0800)
Changed the script to functional programming paradigm to remove the big number of local attributes to decrease memory usage when running it. Additional OS info are now included.

nominatim/tools/collect_os_info.py [new file with mode: 0644]
utils/collect_os_info.py [deleted file]
utils/collect_os_info.sh [deleted file]

diff --git a/nominatim/tools/collect_os_info.py b/nominatim/tools/collect_os_info.py
new file mode 100644 (file)
index 0000000..9d76f22
--- /dev/null
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Collection of host system information including software versions, memory,
+storage, and database configuration.
+"""
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import List, Optional, Tuple, Union, cast
+
+import psutil
+from psycopg2.extensions import make_dsn, parse_dsn
+
+from nominatim.config import Configuration
+from nominatim.db.connection import connect
+from nominatim.typing import DictCursorResults
+from nominatim.version import version_str
+
+
+def convert_version(ver_tup: Tuple[int, int]) -> str:
+    """converts tuple version (ver_tup) to a string representation"""
+    return ".".join(map(str, ver_tup))
+
+
+def friendly_memory_string(mem: float) -> str:
+    """Create a user friendly string for the amount of memory specified as mem"""
+    mem_magnitude = ("bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
+    mag = 0
+    # determine order of magnitude
+    while mem > 1000:
+        mem /= 1000
+        mag += 1
+
+    return f"{mem:.1f} {mem_magnitude[mag]}"
+
+
+def run_command(cmd: Union[str, List[str]]) -> str:
+    """Runs a command using the shell and returns the output from stdout"""
+    try:
+        if sys.version_info < (3, 7):
+            cap_out = subprocess.run(cmd, stdout=subprocess.PIPE, check=False)
+        else:
+            cap_out = subprocess.run(cmd, capture_output=True, check=False)
+        return cap_out.stdout.decode("utf-8")
+    except FileNotFoundError:
+        # non-Linux system should end up here
+        return f"Unknown (unable to find the '{cmd}' command)"
+
+
+def os_name_info() -> str:
+    """Obtain Operating System Name (and possibly the version)"""
+    os_info = None
+    # man page os-release(5) details meaning of the fields
+    if Path("/etc/os-release").is_file():
+        os_info = from_file_find_line_portion(
+            "/etc/os-release", "PRETTY_NAME", "=")
+    # alternative location
+    elif Path("/usr/lib/os-release").is_file():
+        os_info = from_file_find_line_portion(
+            "/usr/lib/os-release", "PRETTY_NAME", "="
+        )
+
+    # fallback on Python's os name
+    if os_info is None or os_info == "":
+        os_info = os.name
+
+    # if the above is insufficient, take a look at neofetch's approach to OS detection
+    return os_info
+
+
+# Note: Intended to be used on informational files like /proc
+def from_file_find_line_portion(
+    filename: str, start: str, sep: str, fieldnum: int = 1
+) -> Optional[str]:
+    """open filename, finds the line starting with the 'start' string.
+    Splits the line using seperator and returns a "fieldnum" from the split."""
+    with open(filename, encoding='utf8') as file:
+        result = ""
+        for line in file:
+            if line.startswith(start):
+                result = line.split(sep)[fieldnum].strip()
+        return result
+
+
+def get_postgresql_config(version: int) -> str:
+    """Retrieve postgres configuration file"""
+    try:
+        with open(f"/etc/postgresql/{version}/main/postgresql.conf", encoding='utf8') as file:
+            db_config = file.read()
+            file.close()
+            return db_config
+    except IOError:
+        return f"**Could not read '/etc/postgresql/{version}/main/postgresql.conf'**"
+
+
+def report_system_information(config: Configuration) -> None:
+    """Generate a report about the host system including software versions, memory,
+    storage, and database configuration."""
+
+    with connect(make_dsn(config.get_libpq_dsn(), dbname='postgres')) as conn:
+        postgresql_ver: str = convert_version(conn.server_version_tuple())
+
+        with conn.cursor() as cur:
+            cur.execute(f"""
+            SELECT datname FROM pg_catalog.pg_database 
+            WHERE datname='{parse_dsn(config.get_libpq_dsn())['dbname']}'""")
+            nominatim_db_exists = cast(Optional[DictCursorResults], cur.fetchall())
+            if nominatim_db_exists:
+                with connect(config.get_libpq_dsn()) as conn:
+                    postgis_ver: str = convert_version(conn.postgis_version_tuple())
+            else:
+                postgis_ver = "Unable to connect to database"
+
+    postgresql_config: str = get_postgresql_config(int(float(postgresql_ver)))
+
+    # Note: psutil.disk_partitions() is similar to run_command("lsblk")
+
+    # Note: run_command("systemd-detect-virt") only works on Linux, on other OSes
+    # should give a message: "Unknown (unable to find the 'systemd-detect-virt' command)"
+
+    # Generates the Markdown report.
+
+    report = f"""
+    **Instructions**
+    Use this information in your issue report at https://github.com/osm-search/Nominatim/issues
+    Redirect the output to a file:
+    $ ./collect_os_info.py > report.md
+
+
+    **Software Environment:**
+    - Python version: {sys.version}
+    - Nominatim version: {version_str()} 
+    - PostgreSQL version: {postgresql_ver} 
+    - PostGIS version: {postgis_ver}
+    - OS: {os_name_info()}
+    
+    
+    **Hardware Configuration:**
+    - RAM: {friendly_memory_string(psutil.virtual_memory().total)}
+    - number of CPUs: {psutil.cpu_count(logical=False)}
+    - bare metal/AWS/other cloud service (per systemd-detect-virt(1)): {run_command("systemd-detect-virt")} 
+    - type and size of disks:
+    **`df -h` - df - report file system disk space usage: **
+    ```
+    {run_command(["df", "-h"])}
+    ```
+    
+    **lsblk - list block devices: **
+    ```
+    {run_command("lsblk")}
+    ```
+    
+    
+    **Postgresql Configuration:**
+    ```
+    {postgresql_config}
+    ```
+    **Notes**
+    Please add any notes about anything above anything above that is incorrect.
+"""
+    print(report)
diff --git a/utils/collect_os_info.py b/utils/collect_os_info.py
deleted file mode 100644 (file)
index 0f5bf04..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-
-import os
-from pathlib import Path
-import subprocess
-import sys
-from typing import Optional, Union
-
-# external requirement
-import psutil
-
-# from nominatim.version import NOMINATIM_VERSION
-# from nominatim.db.connection import connect
-
-
-class ReportSystemInformation:
-       """Generate a report about the host system including software versions, memory,
-          storage, and database configuration."""
-       def __init__(self):
-               self._memory: int = psutil.virtual_memory().total
-               self.friendly_memory: str = self._friendly_memory_string(self._memory)
-               # psutil.cpu_count(logical=False) returns the number of CPU cores.
-               # For number of logical cores (Hypthreaded), call psutil.cpu_count() or os.cpu_count() 
-               self.num_cpus: int = psutil.cpu_count(logical=False)
-               self.os_info: str = self._os_name_info()
-
-### These are commented out because they have not been tested.
-#              self.nominatim_ver: str = '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)
-#      self._pg_version = conn.server_version_tuple()
-#      self._postgis_version = conn.postgis_version_tuple()
-#              self.postgresql_ver: str = self._convert_version(self._pg_version)
-#              self.postgis_ver: str = self._convert_version(self._postgis_version)
-
-               self.nominatim_ver: str = ""
-               self.postgresql_ver: str = ""
-               self.postgresql_config: str = ""
-               self.postgis_ver: str = ""
-
-               # the below commands require calling the shell to gather information
-               self.disk_free: str = self._run_command(["df", "-h"])
-               self.lsblk: str = self._run_command("lsblk")
-               # psutil.disk_partitions() <- this function is similar to the above, but it is cross platform
-
-               # Note: `systemd-detect-virt` command only works on Linux, on other OSes
-               # should give a message: "Unknown (unable to find the 'systemd-detect-virt' command)"
-               self.container_vm_env: str = self._run_command("systemd-detect-virt")
-
-       def _convert_version(self, ver_tup: tuple) -> str:
-               """converts tuple version (ver_tup) to a string representation"""
-               return ".".join(map(str,ver_tup))
-
-       def _friendly_memory_string(self, mem: int) -> str:
-               """Create a user friendly string for the amount of memory specified as mem"""
-               mem_magnitude = ('bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
-               mag = 0
-               # determine order of magnitude
-               while mem > 1000:
-                       mem /= 1000
-                       mag += 1
-               
-               return f"{mem:.1f} {mem_magnitude[mag]}"
-
-
-       def _run_command(self, cmd: Union[str, list]) -> str:
-               """Runs a command using the shell and returns the output from stdout"""
-               try:
-                       if sys.version_info < (3, 7):
-                               cap_out = subprocess.run(cmd, stdout=subprocess.PIPE)
-                       else:
-                               cap_out = subprocess.run(cmd, capture_output=True)
-                       return cap_out.stdout.decode("utf-8")
-               except FileNotFoundError:
-                               # non-Linux system should end up here
-                               return f"Unknown (unable to find the '{cmd}' command)"
-
-
-       def _os_name_info(self) -> str:
-               """Obtain Operating System Name (and possibly the version)"""
-
-               os_info = None
-               # man page os-release(5) details meaning of the fields
-               if Path("/etc/os-release").is_file():
-                       os_info = self._from_file_find_line_portion("/etc/os-release", "PRETTY_NAME", "=")
-               # alternative location 
-               elif Path("/usr/lib/os-release").is_file():
-                       os_info = self._from_file_find_line_portion("/usr/lib/os-release", "PRETTY_NAME", "=")
-
-               # fallback on Python's os name
-               if(os_info is None or os_info == ""):
-                       os_info = os.name
-
-               # if the above is insufficient, take a look at neofetch's approach to OS detection              
-               return os_info
-
-
-       # Note: Intended to be used on informational files like /proc
-       def _from_file_find_line_portion(self, filename: str, start: str, sep: str,
-                                                                        fieldnum: int = 1) -> Optional[str]:
-               """open filename, finds the line starting with the 'start' string.
-                  Splits the line using seperator and returns a "fieldnum" from the split."""
-               with open(filename) as fh:
-                       for line in fh:
-                               if line.startswith(start):
-                                       result = line.split(sep)[fieldnum].strip()
-                                       return result
-
-       def report(self, out = sys.stdout, err = sys.stderr) -> None:
-               """Generates the Markdown report. 
-               
-               Optionally pass out or err parameters to redirect the output of stdout
-                and stderr to other file objects."""
-               
-               # NOTE: This should be a report format.  Any conversions or lookup has be
-               #  done, do that action in the __init__() or another function. 
-               message = """
-Use this information in your issue report at https://github.com/osm-search/Nominatim/issues
-Copy and paste or redirect the output of the file:
-    $ ./collect_os_info.py > report.md
-"""
-               report = f"""
-**Software Environment:**
-- Python version: {sys.version}
-- Nominatim version: {self.nominatim_ver} 
-- PostgreSQL version: {self.postgresql_ver} 
-- PostGIS version: {self.postgis_ver}
-- OS: {self.os_info}
-
-
-**Hardware Configuration:**
-- RAM: {self.friendly_memory}
-- number of CPUs: {self.num_cpus}
-- bare metal/AWS/other cloud service (per systemd-detect-virt(1)): {self.container_vm_env} 
-- type and size of disks:
-**`df -h` - df - report file system disk space usage: **
-```
-{self.disk_free}
-```
-
-**lsblk - list block devices: **
-```
-{self.lsblk}
-```
-
-
-**Postgresql Configuration:**
-```
-{self.postgresql_config}
-```
-**Notes**
-Please add any notes about anything above anything above that is incorrect.
-       """
-
-               print(message, file = err)
-               print(report, file = out)
-
-
-if __name__ == "__main__":
-       sys_info = ReportSystemInformation()
-       sys_info.report()
\ No newline at end of file
diff --git a/utils/collect_os_info.sh b/utils/collect_os_info.sh
deleted file mode 100755 (executable)
index 15ea6c3..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env bash
-
-Description="The purpose of this script is to collect system information for bug reports.\n
-Submit issues to https://github.com/osm-search/Nominatim/issues"
-
-
-####### Gather the Information ##################################################
-# Separate the information gathering from the report generation.  Dividing these
-# makes it easier to make trivial changes by not have to learn the other portion
-# of this script.
-
-# Nominatium version
-# NOTE: Getting this version will NOT work if it is being ran from in another
-# folder than Nominatim/utils.  It call python3 to import version.py locally and
-# prints it in the version format. 
-NominatimVersion=`cd ../nominatim/ && python3 -c "import version; print('{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version.NOMINATIM_VERSION))"`
-
-# PostgreSQL version
-PostgreSQLVersion=`postgres --version`
-if [ "$?" -ne "0" ]
-then
-  PostgreSQLVersion="Not installed"
-fi
-
-# - PostGIS version:
-# The command for this should look something like this:
-#       psql -U nominatim -d mydatabase -c 'SELECT PostGIS_full_version();'
-# ASSUME the username is nominatim
-# This needs to be ran under the account with the appropriate permissions.
-# This has been left blank.
-PostGISVersion=
-
-# There are different ways to getting the Linux OS information.
-# https://www.cyberciti.biz/faq/how-to-check-os-version-in-linux-command-line/
-# /etc/os-release has a number of representations of the OS
-# PRETTY_NAME is pity.
-OperatingSystem=`grep '^PRETTY_NAME' /etc/os-release | cut -d'=' -f2`
-
-RAM=`grep ^MemTotal /proc/meminfo | cut -d':' -f2`
-
-# In /proc/cupinfo: siblings seems to refer to total cores like hyperthreaded cores.
-# The hyperthreaded cores could be included if that is needed.
-NumCPUs=`grep '^cpu cores' /proc/cpuinfo | head -1 | cut -d':' -f2`
-
-
-# - type and size of disks:
-# could use `sudo fdisk -l` or `mount` to print this, but snaps have made this
-# worse than useless with loop devices on Ubuntu.  
-# `df -h` - show the free space on drives
-# `lsblk` - this tell you what the server has not necessarily this machine.  So in a container environment
-#  (like docker) this wouldn't be the correct report.
-# This guide shows ways to get various storage device information: https://www.cyberciti.biz/faq/find-hard-disk-hardware-specs-on-linux/
-
-# - bare metal/AWS/other cloud service:
-# Unsure of how to detect this, but it might be useful for reporting disk storage.
-# One options would be to prompt the user something like this:
-# Enter system configuration (1) bare metal (2) AWS (3) Other Cloud (4) Docker (5) Other: _
-
-# ------ What do these commands do? -------------------------------------------
-# "cut -d':' -f2"      command take the line and splits it at the semicolon(:)
-#                      and returns the portion in the second (2nd) "field"
-#
-# "head -1"            returns the first line that matches
-#
-
-####### Print the Markdown Report ######################################################
-# 1>&2 redirects echo to print to stderr instead of stdout
-
-echo 1>&2
-echo -e $Description 1>&2
-echo Copy and paste or redirect the output of the file:  1>&2
-echo "     \$ ./collect_os_info.sh > report.md" 1>&2
-echo 1>&2
-
-
-echo "**Software Environment (please complete the following information):**"
-echo - Nominatim version: $NominatimVersion
-echo - PostgreSQL version: $PostgreSQLVersion 
-echo - PostGIS version: $PostGISVersion 
-echo - OS: $OperatingSystem
-echo
-echo
-
-
-echo "**Hardware Configuration (please correct the following information):**"
-echo - RAM: $RAM
-echo - number of CPUs: $NumCPUs
-echo - type and size of disks:
-echo - bare metal/AWS/other cloud service: 
-echo
-echo
-echo **Postgresql Configuration:**
-echo
-