@@ -920,21 +920,43 @@ def truncate_archive(filetype, date):
920
920
logger .verbose (cmd )
921
921
922
922
923
- def read_archfile (i , f , filetype , row , colnames , archfiles , db ):
924
- """Read filename ``f`` with index ``i`` (position within list of filenames). The
925
- file has type ``filetype`` and will be added to MSID file at row index ``row``.
926
- ``colnames`` is the list of column names for the content type (not used here).
923
+ def read_archfile (idx_archfile , archfile , filetype , row , colnames , archfiles , db ):
924
+ """Read a FITS file which has been retrieved from the CXCDS archive.
925
+
926
+ Parameters
927
+ ----------
928
+ idx_archfile : int
929
+ Index of the archfile in the list of archfiles
930
+ archfile : str
931
+ Full path of FITS file to read
932
+ filetype : dict
933
+ Filetype dictionary with keys, level, instrum, content, arc5gl_query, fileglob.
934
+ row : int
935
+ Row number in the MSID file to start writing data
936
+ colnames : list of str
937
+ List of column names for the content type
938
+ archfiles : numpy structured array
939
+ Array of archfiles
940
+
941
+ Returns
942
+ -------
943
+ dat : numpy structured array
944
+ Data read from the FITS file
945
+ archfiles_row : dict
946
+ Row of info about this FILE to insert into archfiles table for content type
927
947
"""
928
948
# Check if filename is already in archfiles. If so then abort further processing.
929
- filename = os .path .basename (f )
949
+ filename = os .path .basename (archfile )
930
950
if db .fetchall ("SELECT filename FROM archfiles WHERE filename=?" , (filename ,)):
931
- logger .verbose ("File %s already in archfiles - unlinking and skipping" % f )
932
- os .unlink (f )
951
+ logger .verbose (
952
+ "File %s already in archfiles - unlinking and skipping" % archfile
953
+ )
954
+ os .unlink (archfile )
933
955
return None , None
934
956
935
957
# Read FITS archive file and accumulate data into dats list and header into headers dict
936
- logger .info ("Reading (%d / %d) %s" % (i , len (archfiles ), filename ))
937
- hdus = pyfits .open (f , character_as_bytes = True )
958
+ logger .info ("Reading (%d / %d) %s" % (idx_archfile , len (archfiles ), filename ))
959
+ hdus = pyfits .open (archfile , character_as_bytes = True )
938
960
hdu = hdus [1 ]
939
961
940
962
try :
@@ -975,14 +997,33 @@ def read_archfile(i, f, filetype, row, colnames, archfiles, db):
975
997
return dat , archfiles_row
976
998
977
999
978
- def read_derived (i , filename , filetype , row , colnames , archfiles , db ):
1000
+ def read_derived (idx_archfile , filename , filetype , row , colnames , archfiles , db ):
979
1001
"""Read derived data using eng_archive and derived computation classes.
980
- ``filename`` has format <content>_<index0>_<index1> where <content>
981
- is the content type (e.g. "dp_thermal128"), <index0> is the start index for
982
- the new data and index1 is the end index (using Python slicing convention
983
- index0:index1). Args ``i``, ``filetype``, and ``row`` are as in
984
- read_archive(). ``row`` must equal <index0>. ``colnames`` is the list of
985
- column names for the content type.
1002
+
1003
+ Parameters
1004
+ ----------
1005
+ idx_archfile : int
1006
+ Index of the archfile in the list of archfiles
1007
+ filename : str
1008
+ File to read with format <content>_<index0>_<index1> where <content> is the
1009
+ content type (e.g. "dp_thermal128"), <index0> is the start index for the new
1010
+ data and <index1> is the end index (using Python slicing convention
1011
+ index0:index1).
1012
+ filetype : dict
1013
+ Filetype dictionary with keys, level, instrum, content, arc5gl_query, fileglob.
1014
+ row : int
1015
+ Row number in the MSID file to start writing data (must equal <index0>)
1016
+ colnames : list of str
1017
+ List of column names for the content type
1018
+ archfiles : numpy structured array
1019
+ Array of archfiles
1020
+
1021
+ Returns
1022
+ -------
1023
+ dat : numpy structured array
1024
+ Data read from the FITS file
1025
+ archfiles_row : dict
1026
+ Row of info about this FILE to insert into archfiles table for content type
986
1027
"""
987
1028
# Check if filename is already in archfiles. If so then abort further processing.
988
1029
@@ -999,20 +1040,20 @@ def read_derived(i, filename, filetype, row, colnames, archfiles, db):
999
1040
time_step = mnf_step * cheta .derived .MNF_TIME
1000
1041
times = time_step * np .arange (index0 , index1 )
1001
1042
1002
- logger .info ("Reading (%d / %d) %s" % (i , len (archfiles ), filename ))
1043
+ logger .info ("Reading (%d / %d) %s" % (idx_archfile , len (archfiles ), filename ))
1003
1044
vals = {}
1004
1045
bads = np .zeros ((len (times ), len (colnames )), dtype = bool )
1005
- for i , colname in enumerate (colnames ):
1046
+ for ii , colname in enumerate (colnames ):
1006
1047
if colname == "TIME" :
1007
1048
vals [colname ] = times
1008
- bads [:, i ] = False
1049
+ bads [:, ii ] = False
1009
1050
else :
1010
1051
dp_class = getattr (cheta .derived , colname .upper ())
1011
1052
dp = dp_class ()
1012
1053
dataset = dp .fetch (times [0 ] - 1000 , times [- 1 ] + 1000 )
1013
1054
ok = (index0 <= dataset .indexes ) & (dataset .indexes < index1 )
1014
1055
vals [colname ] = dp .calc (dataset )[ok ]
1015
- bads [:, i ] = dataset .bads [ok ]
1056
+ bads [:, ii ] = dataset .bads [ok ]
1016
1057
1017
1058
vals ["QUALITY" ] = bads
1018
1059
dat = Ska .Numpy .structured_array (vals , list (colnames ) + ["QUALITY" ])
@@ -1059,9 +1100,11 @@ def update_msid_files(filetype, archfiles):
1059
1100
1060
1101
content_is_derived = filetype ["instrum" ] == "DERIVED"
1061
1102
1062
- for i , f in enumerate (archfiles ):
1103
+ for idx_archfile , archfile in enumerate (archfiles ):
1063
1104
get_data = read_derived if content_is_derived else read_archfile
1064
- dat , archfiles_row = get_data (i , f , filetype , row , colnames , archfiles , db )
1105
+ dat , archfiles_row = get_data (
1106
+ idx_archfile , archfile , filetype , row , colnames , archfiles , db
1107
+ )
1065
1108
if dat is None :
1066
1109
continue
1067
1110
@@ -1154,7 +1197,7 @@ def update_msid_files(filetype, archfiles):
1154
1197
# subsequent relocation into arch_files archive. In the case of a gap
1155
1198
# where ingest is stopped before all archfiles are processed, this will
1156
1199
# leave files in a tmp dir.
1157
- archfiles_processed .append (f )
1200
+ archfiles_processed .append (archfile )
1158
1201
if not opt .dry_run :
1159
1202
db .insert (archfiles_row , "archfiles" )
1160
1203
0 commit comments