|
10 | 10 | import csv
|
11 | 11 |
|
12 | 12 |
|
| 13 | +def total_cpu_percent_with_children(pid: int) -> float: |
| 14 | + """Return total CPU usage (%) for process `pid` and its children. |
| 15 | +
|
| 16 | + Args: |
| 17 | + pid: Process ID to monitor. |
| 18 | +
|
| 19 | + Returns: |
| 20 | + Total CPU usage (%) for the process and its children. |
| 21 | + """ |
| 22 | + try: |
| 23 | + parent = psutil.Process(pid) |
| 24 | + except psutil.NoSuchProcess: |
| 25 | + return 0.0 |
| 26 | + |
| 27 | + # Prime CPU measurement for child processes. |
| 28 | + processes = [parent] + parent.children(recursive=True) |
| 29 | + for proc in processes: |
| 30 | + try: |
| 31 | + proc.cpu_percent(interval=None) # Prime the reading |
| 32 | + except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): |
| 33 | + continue # Ignore inaccessible processes |
| 34 | + |
| 35 | + time.sleep(0.1) # Allow measurements to update |
| 36 | + |
| 37 | + # Get the real CPU usage for all processes. |
| 38 | + total_cpu = 0.0 |
| 39 | + for proc in processes: |
| 40 | + try: |
| 41 | + total_cpu += proc.cpu_percent(interval=0.0) # Get real CPU % |
| 42 | + except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): |
| 43 | + continue # Ignore processes that disappeared |
| 44 | + return total_cpu |
| 45 | + |
| 46 | + |
| 47 | +def total_memory_with_children(pid: int) -> float: |
| 48 | + """ |
| 49 | + Return total memory usage (MB) for a process and its children. |
| 50 | +
|
| 51 | + Args: |
| 52 | + pid: Parent process ID. |
| 53 | +
|
| 54 | + Returns: |
| 55 | + Total memory usage in MB. |
| 56 | + """ |
| 57 | + try: |
| 58 | + parent = psutil.Process(pid) |
| 59 | + children = parent.children(recursive=True) |
| 60 | + all_processes = [parent] + children |
| 61 | + total_mem = 0 |
| 62 | + for proc in all_processes: |
| 63 | + try: |
| 64 | + mem_info = proc.memory_info() |
| 65 | + total_mem += mem_info.rss # Count physical memory (RAM) |
| 66 | + except (psutil.NoSuchProcess, psutil.AccessDenied): |
| 67 | + continue # Ignore processes we can't access |
| 68 | + return total_mem / (1024 * 1024) # Convert bytes to MB |
| 69 | + except psutil.NoSuchProcess: |
| 70 | + return 0.0 # Process not found |
| 71 | + |
| 72 | + |
| 73 | +def total_gpu_usage_with_children(pid: int) -> tuple: |
| 74 | + """Return total GPU and VRAM usage (%) for process `pid` and its children. |
| 75 | +
|
| 76 | + Args: |
| 77 | + pid: Process ID to monitor. |
| 78 | +
|
| 79 | + Returns: |
| 80 | + Tuple containing total GPU usage (%) and total VRAM usage (MB) for the process |
| 81 | + and its children. |
| 82 | + """ |
| 83 | + total_gpu_usage = 0 |
| 84 | + total_vram_usage = 0 |
| 85 | + |
| 86 | + try: |
| 87 | + parent = psutil.Process(pid) |
| 88 | + children = parent.children(recursive=True) |
| 89 | + all_processes = [parent] + children |
| 90 | + |
| 91 | + device_count = pynvml.nvmlDeviceGetCount() |
| 92 | + for i in range(device_count): |
| 93 | + handle = pynvml.nvmlDeviceGetHandleByIndex(i) |
| 94 | + processes = pynvml.nvmlDeviceGetComputeRunningProcesses(handle) |
| 95 | + for proc_info in processes: |
| 96 | + if proc_info.pid in [p.pid for p in all_processes]: |
| 97 | + total_gpu_usage += pynvml.nvmlDeviceGetUtilizationRates(handle).gpu |
| 98 | + total_vram_usage += proc_info.usedGpuMemory / (1024 * 1024) # MB |
| 99 | + except Exception: |
| 100 | + pass # Ignore errors (e.g., no GPU available) |
| 101 | + return total_gpu_usage, total_vram_usage |
| 102 | + |
| 103 | + |
| 104 | +def find_pid_by_name(name: str) -> int: |
| 105 | + """Find the PID of the process with the given name. |
| 106 | +
|
| 107 | + Args: |
| 108 | + name: Name of the process to find. |
| 109 | +
|
| 110 | + Returns: |
| 111 | + Process ID of the process with the given name. |
| 112 | + """ |
| 113 | + for proc in psutil.process_iter(["pid", "name", "cmdline"]): |
| 114 | + if name in proc.info["cmdline"]: |
| 115 | + found_pid = proc.info["pid"] |
| 116 | + click.echo( |
| 117 | + click.style(f"Found process '{name}' with PID {found_pid}.", fg="green") |
| 118 | + ) |
| 119 | + return found_pid |
| 120 | + click.echo(click.style(f"Error: Process with name '{name}' not found.", fg="red")) |
| 121 | + return None |
| 122 | + |
| 123 | + |
13 | 124 | @click.command()
|
14 |
| -@click.option("--pid", type=int, required=True, help="Process ID of the Python script") |
15 |
| -@click.option("--interval", type=int, default=2, help="Monitoring interval (seconds)") |
16 | 125 | @click.option(
|
17 |
| - "--duration", type=int, default=30, help="Total monitoring duration (seconds)" |
| 126 | + "--pid", type=str, default="auto", help='Process ID or "auto" to find by name' |
| 127 | +) |
| 128 | +@click.option( |
| 129 | + "--name", type=str, default="app.py", help="Process name (default: app.py)" |
18 | 130 | )
|
| 131 | +@click.option("--interval", type=int, default=2, help="Monitoring interval (seconds)") |
19 | 132 | @click.option(
|
20 |
| - "--output", |
21 |
| - type=str, |
22 |
| - default=None, |
23 |
| - help="File to save system resource logs (optional)", |
| 133 | + "--duration", type=int, default=30, help="Total monitoring duration (seconds)" |
24 | 134 | )
|
| 135 | +@click.option("--output", type=str, default=None, help="File to save logs (optional)") |
25 | 136 | @click.option("--spy", is_flag=True, help="Enable py-spy profiling")
|
26 | 137 | @click.option(
|
27 | 138 | "--spy-output", type=str, default="pyspy_profile.svg", help="Py-Spy output file"
|
28 | 139 | )
|
29 | 140 | def monitor_resources(
|
30 |
| - pid: int, interval: int, duration: int, output: str, spy: bool, spy_output: str |
| 141 | + pid: int, |
| 142 | + name: str, |
| 143 | + interval: int, |
| 144 | + duration: int, |
| 145 | + output: str, |
| 146 | + spy: bool, |
| 147 | + spy_output: str, |
31 | 148 | ):
|
32 | 149 | """Monitor system resources for a given PID and optionally create a py-spy profiler
|
33 | 150 | report.
|
34 | 151 |
|
35 | 152 | Args:
|
36 | 153 | pid (int): Process ID of the Python script.
|
| 154 | + name (str): Name of the Python script. |
37 | 155 | interval (int): Monitoring interval in seconds.
|
38 | 156 | duration (int): Total monitoring duration in seconds.
|
39 | 157 | output (str): File to save logs (optional).
|
40 | 158 | spy (bool): Enable py-spy profiling.
|
41 | 159 | spy_output (str): Py-Spy output file.
|
42 | 160 | """
|
| 161 | + if pid == "auto": |
| 162 | + pid = find_pid_by_name(name) |
| 163 | + if pid is None: |
| 164 | + return |
| 165 | + else: |
| 166 | + pid = int(pid) |
| 167 | + |
43 | 168 | if not psutil.pid_exists(pid):
|
44 | 169 | click.echo(click.style(f"Error: Process with PID {pid} not found.", fg="red"))
|
45 | 170 | return
|
46 | 171 |
|
47 | 172 | click.echo(
|
48 |
| - click.style( |
49 |
| - f"Monitoring system resources for PID {pid} for {duration} seconds...", |
50 |
| - fg="green", |
51 |
| - ) |
| 173 | + click.style(f"Monitoring PID {pid} for {duration} seconds...", fg="green") |
52 | 174 | )
|
53 | 175 |
|
54 | 176 | def run_py_spy():
|
@@ -76,72 +198,52 @@ def run_py_spy():
|
76 | 198 | logs = []
|
77 | 199 | cpu_usages, ram_usages, gpu_usages, vram_usages = [], [], [], []
|
78 | 200 | while time.time() < end_time:
|
| 201 | + start_time = time.time() |
79 | 202 | try:
|
80 |
| - # General system usage. |
81 |
| - process = psutil.Process(pid) |
82 |
| - cpu_usage = process.cpu_percent(interval=interval) |
83 |
| - ram_usage = process.memory_info().rss / (1024 * 1024) # MB |
84 |
| - |
85 |
| - # GPU usage. |
86 |
| - process_gpu_usage = 0 |
87 |
| - process_vram_usage = 0 |
88 |
| - device_count = pynvml.nvmlDeviceGetCount() |
89 |
| - for i in range(device_count): |
90 |
| - handle = pynvml.nvmlDeviceGetHandleByIndex(i) |
91 |
| - processes = pynvml.nvmlDeviceGetComputeRunningProcesses(handle) |
92 |
| - for proc_info in processes: |
93 |
| - if proc_info.pid == pid: |
94 |
| - process_gpu_usage = pynvml.nvmlDeviceGetUtilizationRates( |
95 |
| - handle |
96 |
| - ).gpu |
97 |
| - process_vram_usage = proc_info.usedGpuMemory / ( |
98 |
| - 1024 * 1024 |
99 |
| - ) # MB |
100 |
| - break |
101 |
| - |
102 |
| - # Collect and log resource usage. |
| 203 | + cpu_usage = total_cpu_percent_with_children(pid) |
| 204 | + memory_usage = total_memory_with_children(pid) |
| 205 | + gpu_usage, vram_usage = total_gpu_usage_with_children(pid) |
| 206 | + |
103 | 207 | log_entry = {
|
104 | 208 | "CPU (%)": cpu_usage,
|
105 |
| - "RAM (MB)": ram_usage, |
106 |
| - "GPU (%)": process_gpu_usage, |
107 |
| - "VRAM (MB)": process_vram_usage, |
| 209 | + "RAM (MB)": memory_usage, |
| 210 | + "GPU (%)": gpu_usage, |
| 211 | + "VRAM (MB)": vram_usage, |
108 | 212 | }
|
109 | 213 | click.echo(
|
110 |
| - f"CPU: {cpu_usage:.2f}%, RAM: {ram_usage:.2f}MB, GPU: {process_gpu_usage:.2f}%, VRAM: {process_vram_usage:.2f}MB" |
| 214 | + f"CPU: {cpu_usage:.2f}%, RAM: {memory_usage:.2f}MB, GPU: {gpu_usage:.2f}%, VRAM: {vram_usage:.2f}MB" |
111 | 215 | )
|
112 | 216 | logs.append(log_entry)
|
113 | 217 | cpu_usages.append(cpu_usage)
|
114 |
| - ram_usages.append(ram_usage) |
115 |
| - gpu_usages.append(process_gpu_usage) |
116 |
| - vram_usages.append(process_vram_usage) |
| 218 | + ram_usages.append(memory_usage) |
| 219 | + gpu_usages.append(gpu_usage) |
| 220 | + vram_usages.append(vram_usage) |
| 221 | + |
| 222 | + # Adjust sleep time to maintain exact interval |
| 223 | + elapsed_time = time.time() - start_time |
| 224 | + sleep_time = max(0, interval - elapsed_time) |
| 225 | + time.sleep(sleep_time) |
117 | 226 | except psutil.NoSuchProcess:
|
118 |
| - click.echo(click.style("Error: Process terminated!")) |
| 227 | + click.echo(click.style("Error: Process terminated!", fg="red")) |
119 | 228 | break
|
120 | 229 |
|
121 | 230 | pynvml.nvmlShutdown()
|
122 | 231 |
|
123 |
| - # Calculate and log average resource usage. |
124 |
| - avg_cpu_usage = sum(cpu_usages) / len(cpu_usages) if cpu_usages else 0 |
125 |
| - avg_ram_usage = sum(ram_usages) / len(ram_usages) if ram_usages else 0 |
126 |
| - avg_gpu_usage = sum(gpu_usages) / len(gpu_usages) if gpu_usages else 0 |
127 |
| - avg_vram_usage = sum(vram_usages) / len(vram_usages) if vram_usages else 0 |
128 |
| - avg_log_entry = { |
129 |
| - "CPU (%)": avg_cpu_usage, |
130 |
| - "RAM (MB)": avg_ram_usage, |
131 |
| - "GPU (%)": avg_gpu_usage, |
132 |
| - "VRAM (MB)": avg_vram_usage, |
133 |
| - } |
| 232 | + # Calculate and log averages |
| 233 | + avg_cpu = sum(cpu_usages) / len(cpu_usages) if cpu_usages else 0 |
| 234 | + avg_ram = sum(ram_usages) / len(ram_usages) if ram_usages else 0 |
| 235 | + avg_gpu = sum(gpu_usages) / len(gpu_usages) if gpu_usages else 0 |
| 236 | + avg_vram = sum(vram_usages) / len(vram_usages) if vram_usages else 0 |
| 237 | + |
134 | 238 | click.echo(
|
135 |
| - f"AVERAGE - CPU: {avg_cpu_usage:.2f}%, RAM: {avg_ram_usage:.2f}MB, GPU: {avg_gpu_usage:.2f}%, VRAM: {avg_vram_usage:.2f}MB" |
| 239 | + f"AVERAGE - CPU: {avg_cpu:.2f}%, RAM: {avg_ram:.2f}MB, GPU: {avg_gpu:.2f}%, VRAM: {avg_vram:.2f}MB" |
136 | 240 | )
|
137 |
| - logs.append(avg_log_entry) |
138 | 241 |
|
139 | 242 | # Save logs if output file is provided.
|
140 | 243 | if output:
|
141 | 244 | with open(output, "w", newline="") as csvfile:
|
142 | 245 | fieldnames = ["CPU (%)", "RAM (MB)", "GPU (%)", "VRAM (MB)"]
|
143 | 246 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
144 |
| - |
145 | 247 | writer.writeheader()
|
146 | 248 | writer.writerows(logs)
|
147 | 249 | click.echo(click.style(f"Logs saved to {output}", fg="green"))
|
|
0 commit comments