Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating to ray2 #1982

Merged
merged 14 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion dependencies.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Note all install methods after "main" take
<statsmodels>0.13</statsmodels>
<cloudpickle>2.2</cloudpickle>
<tensorflow>2.9</tensorflow>
<!-- conda is really slow on windows if the version is not specified.-->
<python skip_check='True' os='windows'>3.7</python>
<python skip_check='True' os='mac,linux'>3</python>
<hdf5 skip_check='True'/>
Expand All @@ -67,7 +68,8 @@ Note all install methods after "main" take
<nomkl os='linux' skip_check='True'/>
<numexpr os='linux'/>
<cmake skip_check='True' optional='True'/>
<ray source="pip" pip_extra="[default]">1.13</ray>
<ray source="pip" pip_extra="[default]" os='mac,linux'>2.1</ray>
<ray source="pip" pip_extra="[default]" os='windows'>1.13</ray>
<!-- redis is needed by ray, but on windows, this seems to need to be explicitly stated -->
<redis source="pip" os='windows'/>
<imageio>2.22</imageio>
Expand Down
3 changes: 2 additions & 1 deletion ravenframework/CustomDrivers/DriverUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ def setupFramework():
@ In, None
@ Out, None
"""
frameworkDir = findFramework()
#Get the directory above the ravenframework directory
frameworkDir = os.path.dirname(findFramework())
if frameworkDir not in sys.path:
sys.path.append(frameworkDir)

Expand Down
12 changes: 7 additions & 5 deletions ravenframework/JobHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ def __initializeRay(self):
if 'UPDATE_PYTHONPATH' in self.runInfoDict:
sys.path.extend([p.strip() for p in self.runInfoDict['UPDATE_PYTHONPATH'].split(":")])

if _rayAvail:
# update the python path and working dir
olderPath = os.environ["PYTHONPATH"].split(os.pathsep) if "PYTHONPATH" in os.environ else []
os.environ["PYTHONPATH"] = os.pathsep.join(set(olderPath+sys.path))

# is ray instanciated outside?
self.rayInstanciatedOutside = 'headNode' in self.runInfoDict
if len(self.runInfoDict['Nodes']) > 0 or self.rayInstanciatedOutside:
Expand Down Expand Up @@ -206,11 +211,6 @@ def __initializeRay(self):
self.raiseADebug("Head host IP :", address)
## Get servers and run ray remote listener
servers = self.runInfoDict['remoteNodes'] if self.rayInstanciatedOutside else self.__runRemoteListeningSockets(address, localHostName)
if self.rayInstanciatedOutside:
# update the python path and working dir
# update head node paths
olderPath = os.environ["PYTHONPATH"].split(os.pathsep) if "PYTHONPATH" in os.environ else []
os.environ["PYTHONPATH"] = os.pathsep.join(set(olderPath+sys.path))
# add names in runInfo
self.runInfoDict['remoteNodes'] = servers
## initialize ray server with nProcs
Expand All @@ -220,6 +220,7 @@ def __initializeRay(self):
self.raiseADebug("Executing RAY in the cluster but with a single node configuration")
self.rayServer = ray.init(num_cpus=nProcsHead,log_to_driver=False,include_dashboard=db)
else:
self.raiseADebug("Initializing", "ray" if _rayAvail else "pp","locally with num_cpus: ", self.runInfoDict['totalNumCoresUsed'])
self.rayServer = ray.init(num_cpus=int(self.runInfoDict['totalNumCoresUsed']),include_dashboard=db) if _rayAvail else \
pp.Server(ncpus=int(self.runInfoDict['totalNumCoresUsed']))
if _rayAvail:
Expand All @@ -228,6 +229,7 @@ def __initializeRay(self):
self.raiseADebug("Object store address: ", self.rayServer.address_info['object_store_address'])
self.raiseADebug("Raylet socket name : ", self.rayServer.address_info['raylet_socket_name'])
self.raiseADebug("Session directory : ", self.rayServer.address_info['session_dir'])
self.raiseADebug("GCS Address : ", self.rayServer.address_info['gcs_address'])
if servers:
self.raiseADebug("# of remote servers : ", str(len(servers)))
self.raiseADebug("Remote servers : ", " , ".join(servers))
Expand Down