Skip to content

Commit c5fd843

Browse files
authored
Merge pull request #1283 from OCR-D/mets-server-kill-zombies
add endpoint DELETE /kill_mets_server_zombies to kill -SIGTERM METS servers with ctime > 60mins ago
2 parents 3882e7a + a8bfbe4 commit c5fd843

File tree

3 files changed

+53
-6
lines changed

3 files changed

+53
-6
lines changed

src/ocrd/mets_server.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
"""
22
# METS server functionality
33
"""
4+
import os
45
import re
56
from os import _exit, chmod
7+
import signal
68
from typing import Dict, Optional, Union, List, Tuple
79
from time import sleep
810
from pathlib import Path
@@ -428,8 +430,12 @@ def create_process(mets_server_url: str, ws_dir_path: str, log_file: str) -> int
428430

429431
@staticmethod
430432
def kill_process(mets_server_pid: int):
431-
subprocess_run(args=["kill", "-s", "SIGINT", f"{mets_server_pid}"], shell=False, universal_newlines=True)
432-
return
433+
os.kill(mets_server_pid, signal.SIGINT)
434+
sleep(3)
435+
try:
436+
os.kill(mets_server_pid, signal.SIGKILL)
437+
except ProcessLookupError as e:
438+
pass
433439

434440
def shutdown(self):
435441
if self.is_uds:

src/ocrd_network/processing_server.py

+13
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
get_workflow_content,
4949
get_from_database_workspace,
5050
get_from_database_workflow_job,
51+
kill_mets_server_zombies,
5152
parse_workflow_tasks,
5253
raise_http_exception,
5354
request_processor_server_tool_json,
@@ -200,6 +201,14 @@ def add_api_routes_others(self):
200201
tags=[ServerApiTags.WORKSPACE],
201202
summary="Forward a TCP request to UDS mets server"
202203
)
204+
others_router.add_api_route(
205+
path="/kill_mets_server_zombies",
206+
endpoint=self.kill_mets_server_zombies,
207+
methods=["DELETE"],
208+
tags=[ServerApiTags.WORKFLOW, ServerApiTags.PROCESSING],
209+
status_code=status.HTTP_200_OK,
210+
summary="!! Workaround Do Not Use Unless You Have A Reason !! Kill all METS servers on this machine that have been created more than 60 minutes ago."
211+
)
203212
self.include_router(others_router)
204213

205214
def add_api_routes_processing(self):
@@ -817,6 +826,10 @@ async def get_workflow_info(self, workflow_job_id) -> Dict:
817826
response = self._produce_workflow_status_response(processing_jobs=jobs)
818827
return response
819828

829+
async def kill_mets_server_zombies(self) -> List[int]:
830+
pids_killed = kill_mets_server_zombies(minutes_ago=60)
831+
return pids_killed
832+
820833
async def get_workflow_info_simple(self, workflow_job_id) -> Dict[str, JobState]:
821834
"""
822835
Simplified version of the `get_workflow_info` that returns a single state for the entire workflow.

src/ocrd_network/server_utils.py

+32-4
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
1+
import os
2+
import re
3+
import signal
4+
from pathlib import Path
5+
from json import dumps, loads
6+
from urllib.parse import urljoin
7+
from typing import Dict, List, Union
8+
from time import time
9+
110
from fastapi import HTTPException, status, UploadFile
211
from fastapi.responses import FileResponse
312
from httpx import AsyncClient, Timeout
4-
from json import dumps, loads
513
from logging import Logger
6-
from pathlib import Path
714
from requests import get as requests_get
8-
from typing import Dict, List, Union
9-
from urllib.parse import urljoin
15+
from requests_unixsocket import sys
1016

1117
from ocrd.resolver import Resolver
1218
from ocrd.task_sequence import ProcessorTask
@@ -241,3 +247,25 @@ def validate_first_task_input_file_groups_existence(logger: Logger, mets_path: s
241247
if group not in available_groups:
242248
message = f"Input file group '{group}' of the first processor not found: {input_file_grps}"
243249
raise_http_exception(logger, status.HTTP_422_UNPROCESSABLE_ENTITY, message)
250+
251+
252+
def kill_mets_server_zombies(minutes_ago=60) -> List[int]:
253+
now = time()
254+
cmdline_pat = r'.*ocrd workspace -U.*server start $'
255+
ret = []
256+
for procdir in sorted(Path('/proc').glob('*'), key=os.path.getctime):
257+
if not procdir.is_dir():
258+
continue
259+
cmdline_file = procdir.joinpath('cmdline')
260+
if not cmdline_file.is_file():
261+
continue
262+
ctime_ago = int((now - procdir.stat().st_ctime) / 60)
263+
if ctime_ago < minutes_ago:
264+
continue
265+
cmdline = cmdline_file.read_text().replace('\x00', ' ')
266+
if re.match(cmdline_pat, cmdline):
267+
pid = procdir.name
268+
ret.append(pid)
269+
print(f'METS Server with PID {pid} was created {ctime_ago} minutes ago, more than {minutes_ago}, so killing (cmdline="{cmdline})', file=sys.stderr)
270+
os.kill(int(pid), signal.SIGTERM)
271+
return ret

0 commit comments

Comments
 (0)