Merge pull request #7 from frillweeman/master

Support for Argument Parsing and Can-Utils log format
This commit is contained in:
brent-stone 2020-04-22 17:20:47 -05:00 committed by GitHub
commit 547cd67325
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 82 additions and 17 deletions

4
Pipeline/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
clusters/
figures/
output/

View File

@ -0,0 +1,31 @@
import re
def canUtilsToTSV(filename):
outFileName = filename + ".tsv"
with open(outFileName, "w") as outFile:
with open(filename, "r") as file:
linePattern = re.compile(
r"\((\d+.\d+)\)\s+[^\s]+\s+(.{3})#([0-9A-F]+)")
while True:
line = file.readline()
if not line:
return outFileName
tokens = linePattern.search(line).groups()
# write delta time
writeLine = tokens[0]
# write arb id
writeLine += '\t' + tokens[1]
# write dlc
bytes = int(len(tokens[2]) / 2)
writeLine += '\t' + str(bytes)
# write bytes
for b in range(bytes):
writeLine += '\t' + tokens[2][b*2:b*2+2]
outFile.write(writeLine + '\n')

View File

@ -1,3 +1,4 @@
import argparse
from os import chdir, mkdir, path, remove
from pickle import dump
from sklearn.preprocessing import minmax_scale
@ -8,11 +9,26 @@ from SemanticAnalysis import subset_selection, subset_correlation, greedy_signal
j1979_signal_labeling
from Plotter import plot_j1979, plot_signals_by_arb_id, plot_signals_by_cluster
from PipelineTimer import PipelineTimer
from FromCanUtilsLog import canUtilsToTSV
# File names for the on-disc data input and output.
# Input:
can_data_filename: str = 'drive_runway_afit.log'
# can_data_filename: str = 'loggerProgram0.log'
# get filename from argument parser
parser = argparse.ArgumentParser()
parser.add_argument("filename", nargs='*', type=str,
help="filename of CAN log file")
parser.add_argument(
"-c", "--can-utils", help="read file in Linux can-utils format", action="store_true")
args = parser.parse_args()
# degault to "loggerProgram0.log" if no filename specified by args
can_data_filename = args.filename[0] if args.filename else "loggerProgram0.log"
if (args.can_utils):
# run converter to convert to TSV before continuing
can_data_filename = canUtilsToTSV(can_data_filename)
# Output:
output_folder: str = 'output'
@ -64,7 +80,8 @@ min_correlation_threshold: float = 0.85
a_timer = PipelineTimer(verbose=True)
# DATA IMPORT AND PRE-PROCESSING #
pre_processor = PreProcessor(can_data_filename, pickle_arb_id_filename, pickle_j1979_filename)
pre_processor = PreProcessor(
can_data_filename, pickle_arb_id_filename, pickle_j1979_filename)
id_dictionary, j1979_dictionary = pre_processor.generate_arb_id_dictionary(a_timer,
tang_normalize_strategy,
time_conversion,
@ -88,7 +105,8 @@ signal_dictionary = generate_signals(a_timer,
pickle_signal_filename,
signal_normalize_strategy,
force_lexical_analysis)
plot_signals_by_arb_id(a_timer, id_dictionary, signal_dictionary, force_arb_id_plotting)
plot_signals_by_arb_id(a_timer, id_dictionary,
signal_dictionary, force_arb_id_plotting)
# SEMANTIC ANALYSIS #
print("\n\t\t\t##### BEGINNING SEMANTIC ANALYSIS #####")
@ -97,7 +115,8 @@ subset_df = subset_selection(a_timer,
pickle_subset_filename,
force_semantic_analysis,
subset_size=subset_selection_size)
corr_matrix_subset = subset_correlation(subset_df, csv_correlation_filename, force_semantic_analysis)
corr_matrix_subset = subset_correlation(
subset_df, csv_correlation_filename, force_semantic_analysis)
cluster_dict = greedy_signal_clustering(corr_matrix_subset,
correlation_threshold=min_correlation_threshold,
fuzzy_labeling=fuzzy_labeling)
@ -116,7 +135,8 @@ signal_dictionary, j1979_correlations = j1979_signal_labeling(a_timer=a_timer,
signal_dict=signal_dictionary,
correlation_threshold=min_correlation_threshold,
force=force_signal_labeling)
plot_signals_by_cluster(a_timer, cluster_dict, signal_dictionary, use_j1979_tags_in_plots, force_cluster_plotting)
plot_signals_by_cluster(a_timer, cluster_dict, signal_dictionary,
use_j1979_tags_in_plots, force_cluster_plotting)
# DATA STORAGE #
if dump_to_pickle:
@ -173,7 +193,8 @@ if dump_to_pickle:
print("\tComplete...")
if not path.isfile(pickle_j1979_correlation):
timer_flag += 1
print("\nDumping J1979 correlation DataFrame to " + pickle_j1979_correlation)
print("\nDumping J1979 correlation DataFrame to " +
pickle_j1979_correlation)
dump(j1979_correlations, open(pickle_j1979_correlation, "wb"))
print("\tComplete...")
if not path.isfile(pickle_clusters_filename):
@ -183,15 +204,17 @@ if dump_to_pickle:
print("\tComplete...")
if not path.isfile(pickle_all_signal_filename):
timer_flag += 1
print("\nDumping complete signals DataFrame to " + pickle_all_signal_filename)
print("\nDumping complete signals DataFrame to " +
pickle_all_signal_filename)
dump(df_full, open(pickle_all_signal_filename, "wb"))
print("\tComplete...")
if not path.isfile(csv_all_signals_filename):
timer_flag += 1
print("\nDumping complete correlation matrix to " + csv_all_signals_filename)
print("\nDumping complete correlation matrix to " +
csv_all_signals_filename)
corr_matrix_full.to_csv(csv_all_signals_filename)
print("\tComplete...")
if timer_flag is 9:
if timer_flag == 9:
print("\nDumping pipeline timer to " + pickle_timer_filename)
dump(a_timer, open(pickle_timer_filename, "wb"))
print("\tComplete...")

View File

@ -42,12 +42,15 @@ class PreProcessor:
self.data = read_csv(filename,
header=None,
names=['time', 'id', 'dlc', 'b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7'],
names=['time', 'id', 'dlc', 'b0', 'b1',
'b2', 'b3', 'b4', 'b5', 'b6', 'b7'],
skiprows=7,
delimiter='\t',
delim_whitespace=True,
converters=convert_dict,
index_col=0)
print(self.data)
a_timer.set_can_csv_to_df()
# sanity check output of the original data
@ -95,11 +98,13 @@ class PreProcessor:
continue
elif arb_id == 2024:
# This is the J1979 responses (ID 0x7DF & 0x8 = 0x7E8 = 2024)
j1979_data = self.data.loc[self.data['id'] == arb_id].copy()
j1979_data = self.data.loc[self.data['id'] == arb_id].copy(
)
j1979_data.drop('dlc', axis=1, inplace=True)
j1979_data.drop('id', axis=1, inplace=True)
a_timer.start_nested_function_time()
j1979_dictionary = self.generate_j1979_dictionary(j1979_data)
j1979_dictionary = self.generate_j1979_dictionary(
j1979_data)
a_timer.set_j1979_creation()
elif arb_id > 0:
a_timer.start_iteration_time()
@ -110,7 +115,7 @@ class PreProcessor:
# Check if the Arbitration ID always used the same DLC. If not, ignore it.
# We can effectively ignore this Arb ID by not adding it to the Arb ID dictionary.
if this_id.original_data['dlc'].nunique() is not 1:
if this_id.original_data['dlc'].nunique() != 1:
continue
this_id.dlc = this_id.original_data['dlc'].iloc[0]
this_id.original_data.drop('dlc', axis=1, inplace=True)
@ -121,14 +126,16 @@ class PreProcessor:
# not actually on the bus.
if this_id.dlc < 8:
for i in range(this_id.dlc, 8):
this_id.original_data.drop('b' + str(i), axis=1, inplace=True)
this_id.original_data.drop(
'b' + str(i), axis=1, inplace=True)
# Check if there are duplicate index values and correct them.
if not this_id.original_data.index.is_unique:
correction_mask = this_id.original_data.index.duplicated()
this_id.original_data = this_id.original_data[~correction_mask]
this_id.generate_binary_matrix_and_tang(a_timer, normalize_strategy)
this_id.generate_binary_matrix_and_tang(
a_timer, normalize_strategy)
this_id.analyze_transmission_frequency(time_convert=time_conversion,
ci_accuracy=freq_analysis_accuracy,
synchronous_threshold=freq_synchronous_threshold)