Added functions necessary to import the pid dictionary, and added a check for non-monotonic values in data.

2019-10-04 12:31:44 -04:00 · 2019-10-04 12:31:44 -04:00 · ea1bddd625
parent 9f89acdc07
commit ea1bddd625
1 changed files with 35 additions and 3 deletions
--- a/Pipeline_multi-file/PreProcessor.py
+++ b/Pipeline_multi-file/PreProcessor.py
@ -55,18 +55,46 @@ class PreProcessor:
        # print("\nSample of the original data:")
        # print(self.data.head(5), "\n")

+    def import_pid_dict(self, filename):
+        # print("\nSample of the original data:")
+
+        # print(self.data.head(5), "\n")
+        def pid(x):
+            return int(x)
+
+        def title(x):
+            return x
+
+        def formula(fx):
+            f = lambda A, B, C, D: eval(fx)
+            return f
+
+        # Used by pd.read_csv to apply the functions to the respective column vectors in the .csv file
+        convert_dict = {'pid': pid, 'title': title, 'formula': formula}
+
+        print("\nReading in " + self.data_filename + "...")
+
+        return read_csv(filename,
+                        header=None,
+                        names=['pid', 'title', 'formula'],
+                        skiprows=0,
+                        delimiter=',',
+                        converters=convert_dict,
+                        index_col=0)
+
    @staticmethod
-    def generate_j1979_dictionary(j1979_data: DataFrame) -> dict:
+    def generate_j1979_dictionary(j1979_data: DataFrame, pid_dict: DataFrame) -> dict:

        d = {}
        services = j1979_data.groupby('b2')
        for uds_pid, data in services:
-            d[uds_pid] = J1979(uds_pid, data)
+            d[uds_pid] = J1979(uds_pid, data, pid_dict)
        return d

    def generate_arb_id_dictionary(self,
                                   a_timer:                     PipelineTimer,
                                   normalize_strategy:          Callable,
+                                   pid_dict:                    DataFrame,
                                   time_conversion:             int = 1000,
                                   freq_analysis_accuracy:      float = 0.0,
                                   freq_synchronous_threshold:  float = 0.0,
@ -106,7 +134,7 @@ class PreProcessor:
                    j1979_data.drop('dlc', axis=1, inplace=True)
                    j1979_data.drop('id', axis=1, inplace=True)
                    a_timer.start_nested_function_time()
-                    j1979_dictionary = self.generate_j1979_dictionary(j1979_data)
+                    j1979_dictionary = self.generate_j1979_dictionary(j1979_data, pid_dict)
                    a_timer.set_j1979_creation()
                elif arb_id > 0:
                    a_timer.start_iteration_time()
@ -135,6 +163,10 @@ class PreProcessor:
                        correction_mask = this_id.original_data.index.duplicated()
                        this_id.original_data = this_id.original_data[~correction_mask]

+                    # Check for non-monotonic values and sort them to be monotonic
+                    if not this_id.original_data.index.is_monotonic:
+                        this_id.original_data.sort_index(inplace=True)
+
                    this_id.generate_binary_matrix_and_tang(a_timer, normalize_strategy)
                    this_id.analyze_transmission_frequency(time_convert=time_conversion,
                                                           ci_accuracy=freq_analysis_accuracy,