Merge pull request #7 from frillweeman/master

Support for Argument Parsing and Can-Utils log format
2020-04-22 17:20:47 -05:00 · 2020-04-22 17:20:47 -05:00 · 547cd67325
parent f0fdcd631e 0a9953841f
commit 547cd67325
4 changed files with 82 additions and 17 deletions
--- a/Pipeline/.gitignore
+++ b/Pipeline/.gitignore
@ -0,0 +1,4 @@
+clusters/
+figures/
+output/
+
--- a/Pipeline/FromCanUtilsLog.py
+++ b/Pipeline/FromCanUtilsLog.py
@ -0,0 +1,31 @@
+import re
+
+
+def canUtilsToTSV(filename):
+    outFileName = filename + ".tsv"
+    with open(outFileName, "w") as outFile:
+        with open(filename, "r") as file:
+            linePattern = re.compile(
+                r"\((\d+.\d+)\)\s+[^\s]+\s+(.{3})#([0-9A-F]+)")
+
+            while True:
+                line = file.readline()
+                if not line:
+                    return outFileName
+                tokens = linePattern.search(line).groups()
+
+                # write delta time
+                writeLine = tokens[0]
+
+                # write arb id
+                writeLine += '\t' + tokens[1]
+
+                # write dlc
+                bytes = int(len(tokens[2]) / 2)
+                writeLine += '\t' + str(bytes)
+
+                # write bytes
+                for b in range(bytes):
+                    writeLine += '\t' + tokens[2][b*2:b*2+2]
+
+                outFile.write(writeLine + '\n')
--- a/Pipeline/Main.py
+++ b/Pipeline/Main.py
@ -1,3 +1,4 @@
+import argparse
 from os import chdir, mkdir, path, remove
 from pickle import dump
 from sklearn.preprocessing import minmax_scale
@ -8,11 +9,26 @@ from SemanticAnalysis import subset_selection, subset_correlation, greedy_signal
    j1979_signal_labeling
 from Plotter import plot_j1979, plot_signals_by_arb_id, plot_signals_by_cluster
 from PipelineTimer import PipelineTimer
+from FromCanUtilsLog import canUtilsToTSV

 # File names for the on-disc data input and output.
 # Input:
-can_data_filename:          str = 'drive_runway_afit.log'
-# can_data_filename:          str = 'loggerProgram0.log'
+
+# get filename from argument parser
+parser = argparse.ArgumentParser()
+parser.add_argument("filename", nargs='*', type=str,
+                    help="filename of CAN log file")
+parser.add_argument(
+    "-c", "--can-utils", help="read file in Linux can-utils format", action="store_true")
+
+args = parser.parse_args()
+
+# degault to "loggerProgram0.log" if no filename specified by args
+can_data_filename = args.filename[0] if args.filename else "loggerProgram0.log"
+
+if (args.can_utils):
+    # run converter to convert to TSV before continuing
+    can_data_filename = canUtilsToTSV(can_data_filename)

 # Output:
 output_folder:              str = 'output'
@ -64,7 +80,8 @@ min_correlation_threshold:  float = 0.85
 a_timer = PipelineTimer(verbose=True)

 #            DATA IMPORT AND PRE-PROCESSING             #
-pre_processor = PreProcessor(can_data_filename, pickle_arb_id_filename, pickle_j1979_filename)
+pre_processor = PreProcessor(
+    can_data_filename, pickle_arb_id_filename, pickle_j1979_filename)
 id_dictionary, j1979_dictionary = pre_processor.generate_arb_id_dictionary(a_timer,
                                                                           tang_normalize_strategy,
                                                                           time_conversion,
@ -88,7 +105,8 @@ signal_dictionary = generate_signals(a_timer,
                                     pickle_signal_filename,
                                     signal_normalize_strategy,
                                     force_lexical_analysis)
-plot_signals_by_arb_id(a_timer, id_dictionary, signal_dictionary, force_arb_id_plotting)
+plot_signals_by_arb_id(a_timer, id_dictionary,
+                       signal_dictionary, force_arb_id_plotting)

 #                  SEMANTIC ANALYSIS                    #
 print("\n\t\t\t##### BEGINNING SEMANTIC ANALYSIS #####")
@ -97,7 +115,8 @@ subset_df = subset_selection(a_timer,
                             pickle_subset_filename,
                             force_semantic_analysis,
                             subset_size=subset_selection_size)
-corr_matrix_subset = subset_correlation(subset_df, csv_correlation_filename, force_semantic_analysis)
+corr_matrix_subset = subset_correlation(
+    subset_df, csv_correlation_filename, force_semantic_analysis)
 cluster_dict = greedy_signal_clustering(corr_matrix_subset,
                                        correlation_threshold=min_correlation_threshold,
                                        fuzzy_labeling=fuzzy_labeling)
@ -116,7 +135,8 @@ signal_dictionary, j1979_correlations = j1979_signal_labeling(a_timer=a_timer,
                                                              signal_dict=signal_dictionary,
                                                              correlation_threshold=min_correlation_threshold,
                                                              force=force_signal_labeling)
-plot_signals_by_cluster(a_timer, cluster_dict, signal_dictionary, use_j1979_tags_in_plots, force_cluster_plotting)
+plot_signals_by_cluster(a_timer, cluster_dict, signal_dictionary,
+                        use_j1979_tags_in_plots, force_cluster_plotting)

 #                     DATA STORAGE                      #
 if dump_to_pickle:
@ -173,7 +193,8 @@ if dump_to_pickle:
        print("\tComplete...")
    if not path.isfile(pickle_j1979_correlation):
        timer_flag += 1
-        print("\nDumping J1979 correlation DataFrame to " + pickle_j1979_correlation)
+        print("\nDumping J1979 correlation DataFrame to " +
+              pickle_j1979_correlation)
        dump(j1979_correlations, open(pickle_j1979_correlation, "wb"))
        print("\tComplete...")
    if not path.isfile(pickle_clusters_filename):
@ -183,15 +204,17 @@ if dump_to_pickle:
        print("\tComplete...")
    if not path.isfile(pickle_all_signal_filename):
        timer_flag += 1
-        print("\nDumping complete signals DataFrame to " + pickle_all_signal_filename)
+        print("\nDumping complete signals DataFrame to " +
+              pickle_all_signal_filename)
        dump(df_full, open(pickle_all_signal_filename, "wb"))
        print("\tComplete...")
    if not path.isfile(csv_all_signals_filename):
        timer_flag += 1
-        print("\nDumping complete correlation matrix to " + csv_all_signals_filename)
+        print("\nDumping complete correlation matrix to " +
+              csv_all_signals_filename)
        corr_matrix_full.to_csv(csv_all_signals_filename)
        print("\tComplete...")
-    if timer_flag is 9:
+    if timer_flag == 9:
        print("\nDumping pipeline timer to " + pickle_timer_filename)
        dump(a_timer, open(pickle_timer_filename, "wb"))
        print("\tComplete...")
--- a/Pipeline/PreProcessor.py
+++ b/Pipeline/PreProcessor.py
@ -42,12 +42,15 @@ class PreProcessor:

        self.data = read_csv(filename,
                             header=None,
-                             names=['time', 'id', 'dlc', 'b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7'],
+                             names=['time', 'id', 'dlc', 'b0', 'b1',
+                                    'b2', 'b3', 'b4', 'b5', 'b6', 'b7'],
                             skiprows=7,
-                             delimiter='\t',
+                             delim_whitespace=True,
                             converters=convert_dict,
                             index_col=0)

+        print(self.data)
+
        a_timer.set_can_csv_to_df()

        # sanity check output of the original data
@ -95,11 +98,13 @@ class PreProcessor:
                    continue
                elif arb_id == 2024:
                    # This is the J1979 responses (ID 0x7DF & 0x8 = 0x7E8 = 2024)
-                    j1979_data = self.data.loc[self.data['id'] == arb_id].copy()
+                    j1979_data = self.data.loc[self.data['id'] == arb_id].copy(
+                    )
                    j1979_data.drop('dlc', axis=1, inplace=True)
                    j1979_data.drop('id', axis=1, inplace=True)
                    a_timer.start_nested_function_time()
-                    j1979_dictionary = self.generate_j1979_dictionary(j1979_data)
+                    j1979_dictionary = self.generate_j1979_dictionary(
+                        j1979_data)
                    a_timer.set_j1979_creation()
                elif arb_id > 0:
                    a_timer.start_iteration_time()
@ -110,7 +115,7 @@ class PreProcessor:

                    # Check if the Arbitration ID always used the same DLC. If not, ignore it.
                    # We can effectively ignore this Arb ID by not adding it to the Arb ID dictionary.
-                    if this_id.original_data['dlc'].nunique() is not 1:
+                    if this_id.original_data['dlc'].nunique() != 1:
                        continue
                    this_id.dlc = this_id.original_data['dlc'].iloc[0]
                    this_id.original_data.drop('dlc', axis=1, inplace=True)
@ -121,14 +126,16 @@ class PreProcessor:
                    # not actually on the bus.
                    if this_id.dlc < 8:
                        for i in range(this_id.dlc, 8):
-                            this_id.original_data.drop('b' + str(i), axis=1, inplace=True)
+                            this_id.original_data.drop(
+                                'b' + str(i), axis=1, inplace=True)

                    # Check if there are duplicate index values and correct them.
                    if not this_id.original_data.index.is_unique:
                        correction_mask = this_id.original_data.index.duplicated()
                        this_id.original_data = this_id.original_data[~correction_mask]

-                    this_id.generate_binary_matrix_and_tang(a_timer, normalize_strategy)
+                    this_id.generate_binary_matrix_and_tang(
+                        a_timer, normalize_strategy)
                    this_id.analyze_transmission_frequency(time_convert=time_conversion,
                                                           ci_accuracy=freq_analysis_accuracy,
                                                           synchronous_threshold=freq_synchronous_threshold)