Added functions necessary to import the pid dictionary, and added a check for non-monotonic values in data.

This commit is contained in:
JoshuaArking 2019-10-04 12:31:44 -04:00
parent 9f89acdc07
commit ea1bddd625
1 changed files with 35 additions and 3 deletions

View File

@ -55,18 +55,46 @@ class PreProcessor:
# print("\nSample of the original data:")
# print(self.data.head(5), "\n")
def import_pid_dict(self, filename):
# print("\nSample of the original data:")
# print(self.data.head(5), "\n")
def pid(x):
return int(x)
def title(x):
return x
def formula(fx):
f = lambda A, B, C, D: eval(fx)
return f
# Used by pd.read_csv to apply the functions to the respective column vectors in the .csv file
convert_dict = {'pid': pid, 'title': title, 'formula': formula}
print("\nReading in " + self.data_filename + "...")
return read_csv(filename,
header=None,
names=['pid', 'title', 'formula'],
skiprows=0,
delimiter=',',
converters=convert_dict,
index_col=0)
@staticmethod
def generate_j1979_dictionary(j1979_data: DataFrame) -> dict:
def generate_j1979_dictionary(j1979_data: DataFrame, pid_dict: DataFrame) -> dict:
d = {}
services = j1979_data.groupby('b2')
for uds_pid, data in services:
d[uds_pid] = J1979(uds_pid, data)
d[uds_pid] = J1979(uds_pid, data, pid_dict)
return d
def generate_arb_id_dictionary(self,
a_timer: PipelineTimer,
normalize_strategy: Callable,
pid_dict: DataFrame,
time_conversion: int = 1000,
freq_analysis_accuracy: float = 0.0,
freq_synchronous_threshold: float = 0.0,
@ -106,7 +134,7 @@ class PreProcessor:
j1979_data.drop('dlc', axis=1, inplace=True)
j1979_data.drop('id', axis=1, inplace=True)
a_timer.start_nested_function_time()
j1979_dictionary = self.generate_j1979_dictionary(j1979_data)
j1979_dictionary = self.generate_j1979_dictionary(j1979_data, pid_dict)
a_timer.set_j1979_creation()
elif arb_id > 0:
a_timer.start_iteration_time()
@ -135,6 +163,10 @@ class PreProcessor:
correction_mask = this_id.original_data.index.duplicated()
this_id.original_data = this_id.original_data[~correction_mask]
# Check for non-monotonic values and sort them to be monotonic
if not this_id.original_data.index.is_monotonic:
this_id.original_data.sort_index(inplace=True)
this_id.generate_binary_matrix_and_tang(a_timer, normalize_strategy)
this_id.analyze_transmission_frequency(time_convert=time_conversion,
ci_accuracy=freq_analysis_accuracy,