-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathCodeEnv.py
162 lines (133 loc) · 6.32 KB
/
CodeEnv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import gym
import numpy as np
import scipy.io as sio
import os
from pathlib import Path
from utils import *
# default for maxIter : env.spec.max_episode_steps
class CodeEnv(gym.Env):
def __init__(self, env_config): # RLLIB: custom env classes must take a single config parameter
"""
Args:
code: string that identifies the parity-check matrix, e.g., "RM_2_5_std"
maxIter: maximum number of decoding iterations
EbNo_dB: training signal-to-noise ratio (SNR) in dB
(optional):
WBF: weighted bit-flipping (default: False)
Lmax: maximum LLR value (default: 2.0)
asort: automorphism sort (default: False), strategy depends on the code (either RM or BCH)
asortk: if asort==True for BCH codes (default: -2)
path_to_Hmat: path to .mat file for H (default: "~/Hmat")
Hrank: rank of the parity-check matrix (default: will be computed which may take some time)
"""
self.code = env_config["code"]
#path_to_Hmat = env_config.get("path_to_Hmat", "default")
if "path_to_Hmat" in env_config:
path_to_Hmat = Path(env_config["path_to_Hmat"])
else:
path_to_Hmat = Path.home() / "Hmat"
print("CodeEnv: path_to_Hmat not specified. Looking in "+str(path_to_Hmat))
# optional parameters
self.WBF = env_config.get("WBF", False)
self.Lmax = env_config.get("Lmax", 2.0)
self.asort = env_config.get("asort", False)
# environment setup
H = sio.loadmat(str(path_to_Hmat / env_config["code"]))['H']
H = np.int64(H)
self.H = H
self.m = self.H.shape[0] # number of parity checks
self.n = self.H.shape[1] # code length
print("determining rank(H) ...")
self.Hrank = env_config.get("Hrank", gfrank(self.H)) # n-k
print("rank(H) = {}".format(self.Hrank))
self.k = self.n - self.Hrank # code dimension
self.R = self.k/self.n # code rate
self.action_space = gym.spaces.Discrete(self.n)
self.absL_avg = np.zeros(shape=self.n) # running average of abs(LLRs)
self.path_penality = np.zeros(shape=self.n)
self.totCw = 0 # number of total codewords = number of resets
self.maxIter = env_config["maxIter"]
self.set_EbNo_dB(env_config["EbNo_dB"])
if self.WBF == True:
# (s,r)
#P1 = gym.spaces.Tuple((gym.spaces.Discrete(2),)*self.m) # binary tuple/vector
#P2 = gym.spaces.Box(low=0, high=self.Lmax, shape=[self.m], dtype=np.float32) # binary tuple/vector
#self.observation_space = gym.spaces.tuple((P1, P2))
self.observation_space = gym.spaces.Box(low=-self.Lmax, high=self.Lmax, shape=[self.m], dtype=np.float32) # binary tuple/vector
self.CN_neighbors = [self.H[i,:].nonzero() for i in range(self.m)]
else:
self.observation_space = gym.spaces.Tuple((gym.spaces.Discrete(2),)*self.m) # binary tuple/vector
self.reset()
def set_EbNo_dB(self, x):
self.EbNo_dB = x
self.sigma2 = 1/(2*self.R*10**(self.EbNo_dB/10))
def reset(self): # all-zero codeword transmission
c = np.zeros(self.n, dtype=np.int)
y = (-2*c+1) + np.random.normal(scale=np.sqrt(self.sigma2), size=self.n)
if self.asort == True:
if "RM" in self.code:
sind = find_rm_auto(y)
elif "BCH" in self.code:
sind = find_cyclic_auto(y,-2)
else:
raise ValueError("code string must contain either RM or BCH if asort==True")
y = y[sind]
l = 2*y/self.sigma2 # LLRs
z = np.zeros(self.n, dtype=np.int)
z[y<0] = 1 # hard decision observation
# if asort == False, this should converge to uniform
self.absL_avg = (abs(l) + self.totCw*self.absL_avg)/(self.totCw + 1)
self.chat = z # current codeword estimate (including bit flips)
self.error_locations = np.where(z==1)[0]
self.nerr = len(self.error_locations)
# generate syndrome
self.syndrome = np.zeros(self.m, dtype=np.int64)
for i in self.error_locations:
self.syndrome = (self.syndrome + self.H[:,i]) % 2
if self.WBF == True: # compute soft syndrome
self.r = np.abs(y)
self.phi = [self.r[self.CN_neighbors[i]].min() for i in range(self.m)]
self.soft_syndrome = (-2*self.syndrome+1) * self.phi
self._state = self.soft_syndrome
self.path_penality = - abs(l) / np.mean(self.absL_avg) * 1.0/self.maxIter
# new
#self._state = tuple(tuple(self.syndrome), self.r)
else:
self._state = tuple(self.syndrome)
self.path_penality = - self.absL_avg / np.mean(self.absL_avg) * 1.0/self.maxIter
self.nmove = 0
self.totCw += 1
#return self._get_obs()
return self._state
# def _get_obs(self):
# if self.WBF == True:
# return self._state # for Box, np.arrays are required as input
# else:
# return tuple(self._state)
def step(self, action): # see http://gym.openai.com/docs/#observations
self.nmove += 1
self.chat[action] = (self.chat[action] + 1) % 2
self.syndrome = (self.syndrome + self.H[:, action]) % 2
if self.WBF == True:
# update reliabilities + recompute phi
#self.r[action] = self.Lmax
#self.phi = [self.r[self.CN_neighbors[i]].min() for i in range(self.m)]
self._state = (-2*self.syndrome+1) * self.ph
# new
#self._state = tuple(tuple(self.syndrome), self.r)
else:
self._state = tuple(self.syndrome)
if (self.syndrome.sum() == 0):
done = True
reward = self.path_penality[action] + 1
else:
done = False
reward = self.path_penality[action]
#if self.WBF == True: # update path penalities
# self.path_penality[action] = -self.Lmax # may be too high
if self.nmove >= self.maxIter:
done = True
return self._state, reward, done, {}
def render(self):
print('move:',self.nmove,'chat:',self.chat,' state:', self._state)
#print('nerr:',self.nerr,' err loc',self.errloc,' nmove:',self.nmove,' state:', self._state)