First Commit. Details to follow; I'm still learning.
This commit is contained in:
parent
ca184a56de
commit
3749d4f303
|
@ -0,0 +1,2 @@
|
|||
# Default ignored files
|
||||
/workspace.xml
|
|
@ -0,0 +1,16 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/Pipeline_multi-file" isTestSource="false" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/Example_Pipeline_Output" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/Pipeline" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/R" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.7 (CAN_Reverse_Engineering)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TestRunnerService">
|
||||
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,6 @@
|
|||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
|
@ -0,0 +1,4 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (CAN_Reverse_Engineering)" project-jdk-type="Python SDK" />
|
||||
</project>
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/CAN_Reverse_Engineering_git.iml" filepath="$PROJECT_DIR$/.idea/CAN_Reverse_Engineering_git.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
|
@ -9,11 +9,21 @@ from SemanticAnalysis import subset_selection, subset_correlation, greedy_signal
|
|||
from Plotter import plot_j1979, plot_signals_by_arb_id, plot_signals_by_cluster
|
||||
from PipelineTimer import PipelineTimer
|
||||
|
||||
i = 0
|
||||
j = 0
|
||||
# File names for the on-disc data input and output.
|
||||
# Input:
|
||||
can_data_filename: str = 'drive_runway_afit.log'
|
||||
# can_data_filename: str = 'loggerProgram0.log'
|
||||
#can_data_filename: str = 'drive_runway_afit.log'
|
||||
can_data_filename: str = 'loggerProgram0.log'
|
||||
|
||||
while i < 51:
|
||||
if i == 50 and j < 50: #i need to optimize this and redesign it
|
||||
j += 1
|
||||
i = 0
|
||||
elif i == 50 and j == 50:
|
||||
i = 51
|
||||
else:
|
||||
i += 1
|
||||
# Output:
|
||||
output_folder: str = 'output'
|
||||
pickle_arb_id_filename: str = 'pickleArbIDs.p'
|
||||
|
@ -33,15 +43,15 @@ signal_normalize_strategy: Callable = minmax_scale
|
|||
|
||||
# Turn on or off portions of the pipeline and output methods using these flags.
|
||||
force_pre_processing: bool = False
|
||||
force_j1979_plotting: bool = False
|
||||
force_j1979_plotting: bool = True
|
||||
|
||||
force_lexical_analysis: bool = False
|
||||
force_lexical_analysis: bool = True
|
||||
force_arb_id_plotting: bool = True
|
||||
|
||||
force_semantic_analysis: bool = False
|
||||
force_signal_labeling: bool = False
|
||||
force_semantic_analysis: bool = True
|
||||
force_signal_labeling: bool = True
|
||||
use_j1979_tags_in_plots: bool = True
|
||||
force_cluster_plotting: bool = False
|
||||
force_cluster_plotting: bool = True
|
||||
|
||||
dump_to_pickle: bool = True
|
||||
|
||||
|
@ -51,12 +61,14 @@ z_lookup = {.8: 1.28, .9: 1.645, .95: 1.96, .98: 2.33, .99: 2.58}
|
|||
freq_analysis_accuracy = z_lookup[0.9]
|
||||
freq_synchronous_threshold = 0.1
|
||||
|
||||
# Threshold parameters used during lexical analysis.
|
||||
tokenization_bit_distance: float = 0.2
|
||||
# Threshold parameters used during lexical analysis. Default is 0.2
|
||||
tokenization_bit_distance: float = i/100
|
||||
tokenize_padding: bool = True
|
||||
|
||||
# Threshold parameters used during semantic analysis
|
||||
subset_selection_size: float = 0.25
|
||||
|
||||
|
||||
# Threshold parameters used during semantic analysis Default is 0.25 and 0.85
|
||||
subset_selection_size: float = j/100
|
||||
fuzzy_labeling: bool = True
|
||||
min_correlation_threshold: float = 0.85
|
||||
|
||||
|
@ -88,7 +100,7 @@ signal_dictionary = generate_signals(a_timer,
|
|||
pickle_signal_filename,
|
||||
signal_normalize_strategy,
|
||||
force_lexical_analysis)
|
||||
plot_signals_by_arb_id(a_timer, id_dictionary, signal_dictionary, force_arb_id_plotting)
|
||||
plot_signals_by_arb_id(a_timer, id_dictionary, signal_dictionary, i, force_arb_id_plotting)
|
||||
|
||||
# SEMANTIC ANALYSIS #
|
||||
print("\n\t\t\t##### BEGINNING SEMANTIC ANALYSIS #####")
|
||||
|
@ -116,7 +128,7 @@ signal_dictionary, j1979_correlations = j1979_signal_labeling(a_timer=a_timer,
|
|||
signal_dict=signal_dictionary,
|
||||
correlation_threshold=min_correlation_threshold,
|
||||
force=force_signal_labeling)
|
||||
plot_signals_by_cluster(a_timer, cluster_dict, signal_dictionary, use_j1979_tags_in_plots, force_cluster_plotting)
|
||||
plot_signals_by_cluster(a_timer, cluster_dict, signal_dictionary, use_j1979_tags_in_plots, i, force_cluster_plotting)
|
||||
|
||||
# DATA STORAGE #
|
||||
if dump_to_pickle:
|
||||
|
|
|
@ -16,7 +16,10 @@ cluster_folder: str = 'clusters'
|
|||
j1979_folder: str = 'j1979'
|
||||
|
||||
|
||||
def plot_signals_by_arb_id(a_timer: PipelineTimer, arb_id_dict: dict, signal_dict: dict, force: bool=False):
|
||||
def plot_signals_by_arb_id(a_timer: PipelineTimer, arb_id_dict: dict, signal_dict: dict, settings: int, force: bool = False):
|
||||
arb_id_folder = 'figures' + str(settings)
|
||||
|
||||
|
||||
if path.exists(arb_id_folder):
|
||||
if force:
|
||||
rmtree(arb_id_folder)
|
||||
|
@ -29,7 +32,7 @@ def plot_signals_by_arb_id(a_timer: PipelineTimer, arb_id_dict: dict, signal_dic
|
|||
for k_id, signals in signal_dict.items():
|
||||
arb_id = arb_id_dict[k_id]
|
||||
if not arb_id.static:
|
||||
print("Plotting Arb ID " + str(k_id) + " (" + str(hex(k_id)) + ")")
|
||||
print(str(settings) + "Plotting Arb ID " + str(k_id) + " (" + str(hex(k_id)) + ")")
|
||||
a_timer.start_iteration_time()
|
||||
|
||||
signals_to_plot = []
|
||||
|
@ -99,7 +102,9 @@ def plot_signals_by_cluster(a_timer: PipelineTimer,
|
|||
cluster_dict: dict,
|
||||
signal_dict: dict,
|
||||
use_j1979_tags: bool,
|
||||
settings: int,
|
||||
force: bool=False):
|
||||
cluster_folder = 'cluster' + str(settings)
|
||||
if path.exists(cluster_folder):
|
||||
if force:
|
||||
rmtree(cluster_folder)
|
||||
|
|
|
@ -44,7 +44,7 @@ class PreProcessor:
|
|||
header=None,
|
||||
names=['time', 'id', 'dlc', 'b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7'],
|
||||
skiprows=7,
|
||||
delimiter='\t',
|
||||
delimiter=' ',
|
||||
converters=convert_dict,
|
||||
index_col=0)
|
||||
|
||||
|
|
|
@ -60,6 +60,7 @@ class FileBoi:
|
|||
# Check if this file name matches the expected name for a CAN data sample. If so, create new Sample
|
||||
m = re.match('loggerProgram[\d]+.log', file)
|
||||
if m:
|
||||
i = 0
|
||||
if not (make, model, year) in sample_dict:
|
||||
sample_dict[(make, model, year)] = []
|
||||
this_sample_index = str(len(sample_dict[(make, model, year)]))
|
||||
|
|
|
@ -0,0 +1,130 @@
|
|||
from numpy import float64, nditer, uint64, zeros, ndarray, inf
|
||||
from pandas import Series, DataFrame
|
||||
from os import path, remove
|
||||
from pickle import load
|
||||
from ArbID import ArbID
|
||||
from Signal import Signal
|
||||
from PipelineTimer import PipelineTimer
|
||||
from typing import List
|
||||
from scipy import integrate
|
||||
|
||||
|
||||
def transform_signal(a_timer: PipelineTimer,
|
||||
arb_id_dict: dict,
|
||||
signal_dict: dict,
|
||||
transform_pickle_filename: str,
|
||||
normalize_strategy,
|
||||
given_arb_id: int,
|
||||
force=False):
|
||||
if force and path.isfile(transform_pickle_filename):
|
||||
remove(transform_pickle_filename)
|
||||
if path.isfile(transform_pickle_filename):
|
||||
print("\nSignal transformation already completed and forcing is turned off. Using pickled data...")
|
||||
return load(open(transform_pickle_filename, "rb"))
|
||||
|
||||
a_timer.start_function_time()
|
||||
|
||||
transform_dict = signal_dict
|
||||
|
||||
# arb_id_dict[given_arb_id * 256] = ArbID(given_arb_id * 256)
|
||||
|
||||
for k, arb_id in arb_id_dict.items():
|
||||
# print(str(arb_id.id) + " == " + str(given_arb_id) + " ?\n")
|
||||
if arb_id.id == given_arb_id:
|
||||
arb_id.static = False
|
||||
arb_id.short = False
|
||||
if not arb_id.static:
|
||||
for token in arb_id.tokenization:
|
||||
a_timer.start_iteration_time()
|
||||
|
||||
signal = Signal(k * 256, token[0], token[1])
|
||||
signal.static = False
|
||||
|
||||
|
||||
|
||||
# Convert the binary ndarray to a list of string representations of each row
|
||||
temp1 = [''.join(str(x) for x in row) for row in arb_id.boolean_matrix[:, token[0]:token[1] + 1]]
|
||||
temp2 = zeros((temp1.__len__()+1), dtype=uint64)
|
||||
# convert each string representation to int
|
||||
for i, row in enumerate(temp1):
|
||||
temp2[i] = int(row, 2)
|
||||
|
||||
temp3 = integrate.cumtrapz(temp2)
|
||||
print("Arb Id " + str(k) + ", Signal from " + str(token[0]) + " to " + str(token[1]) + " Integrated successfully")
|
||||
|
||||
|
||||
|
||||
# create an unsigned integer pandas.Series using the time index from this Arb ID's original data.
|
||||
signal.time_series = Series(temp3[:], index=arb_id.original_data.index, dtype=float64)
|
||||
|
||||
|
||||
|
||||
# Normalize the signal and update its meta-data
|
||||
signal.normalize_and_set_metadata(normalize_strategy)
|
||||
# add this signal to the signal dictionary which is keyed by Arbitration ID
|
||||
if (k * 256) in transform_dict:
|
||||
transform_dict[k * 256][(arb_id.id * 256, signal.start_index, signal.stop_index)] = signal
|
||||
else:
|
||||
print("Successfully added at transform dict")
|
||||
transform_dict[k * 256] = {(arb_id.id * 256, signal.start_index, signal.stop_index): signal}
|
||||
|
||||
a_timer.set_token_to_signal()
|
||||
|
||||
a_timer.set_signal_generation()
|
||||
|
||||
return transform_dict
|
||||
|
||||
|
||||
def transform_signals(a_timer: PipelineTimer,
|
||||
arb_id_dict: dict,
|
||||
transform_pickle_filename: str,
|
||||
normalize_strategy,
|
||||
force=False):
|
||||
if force and path.isfile(transform_pickle_filename):
|
||||
remove(transform_pickle_filename)
|
||||
if path.isfile(transform_pickle_filename):
|
||||
print("\nSignal transformation already completed and forcing is turned off. Using pickled data...")
|
||||
return load(open(transform_pickle_filename, "rb"))
|
||||
|
||||
a_timer.start_function_time()
|
||||
|
||||
transform_dict = {} # arb_id_dict
|
||||
|
||||
for k, arb_id in arb_id_dict.items():
|
||||
if not arb_id.static:
|
||||
for token in arb_id.tokenization:
|
||||
a_timer.start_iteration_time()
|
||||
|
||||
signal = Signal(k * 256, token[0], token[1])
|
||||
|
||||
|
||||
|
||||
# Convert the binary ndarray to a list of string representations of each row
|
||||
temp1 = [''.join(str(x) for x in row) for row in arb_id.boolean_matrix[:, token[0]:token[1] + 1]]
|
||||
temp2 = zeros((temp1.__len__()+1), dtype=uint64)
|
||||
# convert each string representation to int
|
||||
for i, row in enumerate(temp1):
|
||||
temp2[i] = int(row, 2)
|
||||
|
||||
temp3 = integrate.cumtrapz(temp2)
|
||||
|
||||
|
||||
|
||||
# create an unsigned integer pandas.Series using the time index from this Arb ID's original data.
|
||||
signal.time_series = Series(temp3[:], index=arb_id.original_data.index, dtype=float64)
|
||||
|
||||
|
||||
|
||||
# Normalize the signal and update its meta-data
|
||||
signal.normalize_and_set_metadata(normalize_strategy)
|
||||
# add this signal to the signal dictionary which is keyed by Arbitration ID
|
||||
if k in transform_dict:
|
||||
transform_dict[k][(arb_id.id, signal.start_index, signal.stop_index)] = signal
|
||||
else:
|
||||
transform_dict[k] = {(arb_id.id, signal.start_index, signal.stop_index): signal}
|
||||
|
||||
a_timer.set_token_to_signal()
|
||||
|
||||
a_timer.set_signal_generation()
|
||||
|
||||
return transform_dict
|
|
@ -5,14 +5,22 @@ from Sample import Sample
|
|||
# Cross validation parameters for finding an optimal tokenization inversion distance threshold -- NOT WORKING?
|
||||
kfold_n: int = 5
|
||||
current_vehicle_number = 0
|
||||
known_speed_arb_id = 514
|
||||
|
||||
good_boi = FileBoi()
|
||||
samples = good_boi.go_fetch(kfold_n)
|
||||
for key, sample_list in samples.items(): # type: tuple, list
|
||||
for sample in sample_list: # type: Sample
|
||||
print(current_vehicle_number)
|
||||
|
||||
# sample.tang_inversion_bit_dist += (0.01 * current_vehicle_number)
|
||||
# sample.max_inter_cluster_dist += (0.01 * current_vehicle_number)
|
||||
# sample.tang_inversion_bit_dist = round(sample.tang_inversion_bit_dist, 2) # removes floating point errors
|
||||
# sample.max_inter_cluster_dist = round(sample.max_inter_cluster_dist, 2)
|
||||
# print("\n\t##### Settings are " + str(sample.tang_inversion_bit_dist) + " and " + str(
|
||||
# sample.max_inter_cluster_dist) + " #####")
|
||||
|
||||
print("\nData import and Pre-Processing for " + sample.output_vehicle_dir)
|
||||
id_dict, j1979_dict = sample.pre_process()
|
||||
id_dict, j1979_dict = sample.pre_process(known_speed_arb_id)
|
||||
if j1979_dict:
|
||||
sample.plot_j1979(j1979_dict, vehicle_number=str(current_vehicle_number))
|
||||
|
||||
|
@ -25,14 +33,22 @@ for key, sample_list in samples.items(): # type: tuple, list
|
|||
print("\n\t##### BEGINNING LEXICAL ANALYSIS OF " + sample.output_vehicle_dir + " #####")
|
||||
sample.tokenize_dictionary(id_dict)
|
||||
signal_dict = sample.generate_signals(id_dict, bool(j1979_dict))
|
||||
sample.plot_arb_ids(id_dict, signal_dict, vehicle_number=str(current_vehicle_number))
|
||||
# sample.plot_arb_ids(id_dict, signal_dict, vehicle_number=str(current_vehicle_number))
|
||||
|
||||
# LEXICAL ANALYSIS #
|
||||
# KNOWN SIGNAL ANALYSIS #
|
||||
print("\n\t##### BEGINNING KNOWN SIGNAL ANALYSIS OF " + sample.output_vehicle_dir + " #####")
|
||||
transform_dict= sample.transform_signal(id_dict, signal_dict, known_speed_arb_id)
|
||||
sample.plot_arb_ids(id_dict, transform_dict, vehicle_number=str(current_vehicle_number))
|
||||
|
||||
|
||||
# SEMANTIC ANALYSIS #
|
||||
print("\n\t##### BEGINNING SEMANTIC ANALYSIS OF " + sample.output_vehicle_dir + " #####")
|
||||
corr_matrix, combined_df = sample.generate_correlation_matrix(signal_dict)
|
||||
corr_matrix, combined_df = sample.generate_correlation_matrix(transform_dict)
|
||||
if j1979_dict:
|
||||
signal_dict, j1979_correlation = sample.j1979_labeling(j1979_dict, signal_dict, combined_df)
|
||||
transform_dict, j1979_correlation = sample.j1979_labeling(j1979_dict, transform_dict, combined_df)
|
||||
cluster_dict, linkage_matrix = sample.cluster_signals(corr_matrix)
|
||||
sample.plot_clusters(cluster_dict, signal_dict, bool(j1979_dict), vehicle_number=str(current_vehicle_number))
|
||||
# sample.plot_clusters(cluster_dict, signal_dict, bool(j1979_dict), vehicle_number=str(current_vehicle_number))
|
||||
sample.plot_known_signal_cluster(cluster_dict, signal_dict, bool(j1979_dict), known_speed_arb_id, vehicle_number=str(current_vehicle_number))
|
||||
sample.plot_dendrogram(linkage_matrix, vehicle_number=str(current_vehicle_number))
|
||||
current_vehicle_number += 1
|
||||
|
||||
|
|
|
@ -25,13 +25,13 @@ def plot_signals_by_arb_id(a_timer: PipelineTimer, arb_id_dict: dict, signal_dic
|
|||
rmtree(arb_id_folder)
|
||||
else:
|
||||
print("\nArbID plotting appears to have already been done and forcing is turned off. Skipping...")
|
||||
return
|
||||
# return
|
||||
|
||||
a_timer.start_function_time()
|
||||
|
||||
for k_id, signals in signal_dict.items():
|
||||
arb_id = arb_id_dict[k_id]
|
||||
if not arb_id.static and not arb_id.short:
|
||||
if (not arb_id.static and not arb_id.short) or k_id == 155136:
|
||||
print("Plotting Arb ID " + str(k_id) + " (" + str(hex(k_id)) + ") for Vehicle " + vehicle_number)
|
||||
a_timer.start_iteration_time()
|
||||
|
||||
|
@ -85,7 +85,7 @@ def plot_signals_by_arb_id(a_timer: PipelineTimer, arb_id_dict: dict, signal_dic
|
|||
chdir(arb_id_folder)
|
||||
|
||||
# If you want transparent backgrounds, a different file format, etc. then change these settings accordingly.
|
||||
savefig(hex(arb_id.id) + "." + figure_format,
|
||||
savefig(hex(signal.arb_id) + "." + figure_format,
|
||||
bbox_iches='tight',
|
||||
pad_inches=0.0,
|
||||
dpi=figure_dpi,
|
||||
|
@ -311,3 +311,162 @@ def plot_dendrogram(a_timer: PipelineTimer,
|
|||
transparent=figure_transp)
|
||||
plt.close()
|
||||
print("\t\tComplete...")
|
||||
|
||||
|
||||
def plot_known_signal_cluster(a_timer: PipelineTimer,
|
||||
cluster_dict: dict,
|
||||
signal_dict: dict,
|
||||
use_j1979_tags: bool,
|
||||
vehicle_number: str,
|
||||
given_arb_id: int,
|
||||
force: bool = False):
|
||||
if path.exists(cluster_folder):
|
||||
if force:
|
||||
rmtree(cluster_folder)
|
||||
else:
|
||||
print("\nCluster plotting appears to have already been done and forcing is turned off. Skipping...")
|
||||
return
|
||||
|
||||
a_timer.start_function_time()
|
||||
|
||||
print("\n")
|
||||
for cluster_number, list_of_signals in cluster_dict.items():
|
||||
if [v for i, v in enumerate(list_of_signals) if (v[0] == given_arb_id or v[0] == given_arb_id * 256)]:
|
||||
print("Plotting cluster", cluster_number, "with " + str(len(list_of_signals)) + " signals.")
|
||||
a_timer.start_iteration_time()
|
||||
|
||||
# Setup the plot
|
||||
fig, axes = plt.subplots(nrows=len(list_of_signals), ncols=1, squeeze=False)
|
||||
plt.suptitle("Signal Cluster " + str(cluster_number) + " from Vehicle " + vehicle_number,
|
||||
weight='bold',
|
||||
position=(0.5, 1))
|
||||
fig.set_size_inches(8, (1 + len(list_of_signals)+1) * 1.3)
|
||||
|
||||
size_adjust = len(list_of_signals) / 100
|
||||
# The min() statement provides whitespace for the suptitle depending on the number of subplots.
|
||||
plt.tight_layout(h_pad=1, rect=(0, 0, 1, min(0.985, 0.93 + size_adjust)))
|
||||
# This adjusts whitespace padding on the left and right of the subplots
|
||||
fig.subplots_adjust(left=0.07, right=0.98)
|
||||
|
||||
# Plot the time series of each signal in the cluster
|
||||
for i, signal_key in enumerate(list_of_signals):
|
||||
signal = signal_dict[signal_key[0]][signal_key]
|
||||
ax = axes[i, 0]
|
||||
if signal.j1979_title and use_j1979_tags:
|
||||
this_title = signal.plot_title + " [" + signal.j1979_title + \
|
||||
" (PCC:" + str(round(signal.j1979_pcc, 2)) + ")]"
|
||||
else:
|
||||
this_title = signal.plot_title
|
||||
ax.set_title(this_title,
|
||||
style='italic',
|
||||
size='medium')
|
||||
ax.set_xlim([signal.time_series.first_valid_index(), signal.time_series.last_valid_index()])
|
||||
ax.plot(signal.time_series, color='black')
|
||||
|
||||
if not path.exists(cluster_folder):
|
||||
mkdir(cluster_folder)
|
||||
chdir(cluster_folder)
|
||||
|
||||
# If you want transparent backgrounds, a different file format, etc. then change these settings accordingly.
|
||||
if len(list_of_signals) < 100: # prevents errors when given too low a setting for correlation
|
||||
savefig("cluster_" + str(cluster_number) + "." + figure_format,
|
||||
bbox_iches='tight',
|
||||
pad_inches=0.0,
|
||||
dpi=figure_dpi,
|
||||
format=figure_format,
|
||||
transparent=figure_transp)
|
||||
else:
|
||||
print("Too many clusters to plot! Skipping...")
|
||||
|
||||
chdir("..")
|
||||
|
||||
plt.close(fig)
|
||||
|
||||
a_timer.set_plot_save_cluster()
|
||||
print("\tComplete...")
|
||||
|
||||
a_timer.set_plot_save_cluster_dict()
|
||||
|
||||
|
||||
def plot_signals_by_arb_id(a_timer: PipelineTimer, arb_id_dict: dict, signal_dict: dict, vehicle_number: str,
|
||||
force: bool=False):
|
||||
if path.exists(arb_id_folder):
|
||||
if force:
|
||||
rmtree(arb_id_folder)
|
||||
else:
|
||||
print("\nArbID plotting appears to have already been done and forcing is turned off. Skipping...")
|
||||
# return
|
||||
|
||||
a_timer.start_function_time()
|
||||
|
||||
for k_id, signals in signal_dict.items():
|
||||
arb_id = arb_id_dict[k_id]
|
||||
if (not arb_id.static and not arb_id.short) or k_id == 155136:
|
||||
print("Plotting Arb ID " + str(k_id) + " (" + str(hex(k_id)) + ") for Vehicle " + vehicle_number)
|
||||
a_timer.start_iteration_time()
|
||||
|
||||
signals_to_plot = []
|
||||
# Don't plot the static signals
|
||||
for k_signal, signal in signals.items():
|
||||
if not signal.static:
|
||||
signals_to_plot.append(signal)
|
||||
# There's a corner case where the Arb ID only has static signals. This conditional accounts for this.
|
||||
# TODO: This corner case should probably be reflected by arb_id.static.
|
||||
if len(signals_to_plot) < 1:
|
||||
continue
|
||||
# One row per signal plus one for the TANG. Squeeze is used to force axes to be an array to avoid errors.
|
||||
fig, axes = plt.subplots(nrows=1 + len(signals_to_plot), ncols=1)
|
||||
plt.suptitle("Time Series and TANG for Arbitration ID " + hex(k_id) + " from Vehicle " + vehicle_number,
|
||||
weight='bold',
|
||||
position=(0.5, 1))
|
||||
fig.set_size_inches(8, (1 + len(signals_to_plot) + 1) * 1.3)
|
||||
# The min() statement provides whitespace for the title depending on the number of subplots.
|
||||
size_adjust = len(signals_to_plot) / 100
|
||||
plt.tight_layout(h_pad=1, rect=(0, 0, 1, min(0.985, 0.93 + size_adjust)))
|
||||
# This adjusts whitespace padding on the left and right of the subplots
|
||||
fig.subplots_adjust(left=0.07, right=0.98)
|
||||
for i, signal in enumerate(signals_to_plot):
|
||||
ax = axes[i]
|
||||
ax.set_title(signal.plot_title,
|
||||
style='italic',
|
||||
size='medium')
|
||||
ax.set_xlim([signal.time_series.first_valid_index(), signal.time_series.last_valid_index()])
|
||||
ax.plot(signal.time_series, color='black')
|
||||
# Add a 25% opacity dashed black line to the entropy gradient plot at one boundary of each sub-flow
|
||||
axes[-1].axvline(x=signal.start_index, alpha=0.25, c='black', linestyle='dashed')
|
||||
|
||||
# Plot the entropy gradient at the bottom of the overall output
|
||||
ax = axes[-1]
|
||||
ax.set_title("Min-Max Normalized Transition Aggregation N-Gram (TANG)",
|
||||
style='italic',
|
||||
size='medium')
|
||||
tang_bit_width = arb_id.tang.shape[0]
|
||||
ax.set_xlim([-0.01 * tang_bit_width, 1.005 * tang_bit_width])
|
||||
y = arb_id.tang[:]
|
||||
# Differentiate bit positions with non-zero and zero entropy using black points and grey x respectively.
|
||||
ix = isin(y, 0)
|
||||
pad_bit = where(ix)
|
||||
non_pad_bit = where(~ix)
|
||||
ax.scatter(non_pad_bit, y[non_pad_bit], color='black', marker='o', s=10)
|
||||
ax.scatter(pad_bit, y[pad_bit], color='grey', marker='^', s=10)
|
||||
|
||||
if not path.exists(arb_id_folder):
|
||||
mkdir(arb_id_folder)
|
||||
chdir(arb_id_folder)
|
||||
|
||||
# If you want transparent backgrounds, a different file format, etc. then change these settings accordingly.
|
||||
savefig(hex(signal.arb_id) + "." + figure_format,
|
||||
bbox_iches='tight',
|
||||
pad_inches=0.0,
|
||||
dpi=figure_dpi,
|
||||
format=figure_format,
|
||||
transparent=figure_transp)
|
||||
|
||||
chdir("..")
|
||||
|
||||
plt.close(fig)
|
||||
|
||||
a_timer.set_plot_save_arb_id()
|
||||
print("\tComplete...")
|
||||
|
||||
a_timer.set_plot_save_arb_id_dict()
|
|
@ -1,4 +1,4 @@
|
|||
from pandas import DataFrame, read_csv, Series
|
||||
from pandas import DataFrame, read_csv, Series, concat
|
||||
from numpy import int64
|
||||
from os import path, remove, getcwd
|
||||
from pickle import load
|
||||
|
@ -45,7 +45,7 @@ class PreProcessor:
|
|||
header=None,
|
||||
names=['time', 'id', 'dlc', 'b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7'],
|
||||
skiprows=7,
|
||||
delimiter='\t',
|
||||
delimiter=' ',
|
||||
converters=convert_dict,
|
||||
index_col=0)
|
||||
|
||||
|
@ -70,6 +70,7 @@ class PreProcessor:
|
|||
time_conversion: int = 1000,
|
||||
freq_analysis_accuracy: float = 0.0,
|
||||
freq_synchronous_threshold: float = 0.0,
|
||||
given_arb_id: int = 0,
|
||||
force: bool = False) -> (dict, dict):
|
||||
id_dictionary = {}
|
||||
j1979_dictionary = {}
|
||||
|
@ -92,6 +93,11 @@ class PreProcessor:
|
|||
return id_dictionary, j1979_dictionary
|
||||
else:
|
||||
self.import_csv(a_timer, self.data_filename)
|
||||
this_id = self.data.loc[self.data['id'] == given_arb_id].copy()
|
||||
this_id.id = given_arb_id * 256
|
||||
|
||||
combined = concat([self.data, this_id])
|
||||
self.data = combined
|
||||
|
||||
a_timer.start_function_time()
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ from PreProcessor import PreProcessor
|
|||
from Validator import Validator
|
||||
from LexicalAnalysis import tokenize_dictionary, generate_signals
|
||||
from SemanticAnalysis import generate_correlation_matrix, signal_clustering, j1979_signal_labeling
|
||||
from Plotter import plot_j1979, plot_signals_by_arb_id, plot_signals_by_cluster, plot_dendrogram
|
||||
from Plotter import plot_j1979, plot_signals_by_arb_id, plot_signals_by_cluster, plot_dendrogram, plot_known_signal_cluster
|
||||
from sklearn.preprocessing import minmax_scale
|
||||
from typing import Callable
|
||||
from PipelineTimer import PipelineTimer
|
||||
|
@ -11,6 +11,8 @@ from pickle import dump, load
|
|||
from numpy import ndarray, zeros, float16
|
||||
from pandas import DataFrame
|
||||
|
||||
from KnownSignalAnalysis import transform_signals, transform_signal
|
||||
|
||||
# File names for the on-disc data input and output.
|
||||
output_folder: str = 'output'
|
||||
pickle_arb_id_filename: str = 'pickleArbIDs.p'
|
||||
|
@ -26,6 +28,8 @@ pickle_combined_df_filename: str = 'pickleCombinedDataFrame.p'
|
|||
csv_all_signals_filename: str = 'complete_correlation_matrix.csv'
|
||||
pickle_timer_filename: str = 'pickleTimer.p'
|
||||
|
||||
pickle_transform_filename: str = 'pickleTransform'
|
||||
|
||||
dump_to_pickle: bool = True
|
||||
|
||||
# Change out the normalization strategies as needed.
|
||||
|
@ -39,9 +43,11 @@ force_threshold_plotting: bool = False
|
|||
force_j1979_plotting: bool = True
|
||||
use_j1979: bool = True
|
||||
|
||||
force_transform: bool = False
|
||||
|
||||
force_lexical_analysis: bool = False
|
||||
force_signal_generation: bool = False
|
||||
force_arb_id_plotting: bool = True
|
||||
force_arb_id_plotting: bool = False
|
||||
|
||||
force_correlation_matrix: bool = False
|
||||
force_clustering: bool = False
|
||||
|
@ -58,16 +64,15 @@ freq_synchronous_threshold = 0.1
|
|||
|
||||
# Threshold parameters used during lexical analysis.
|
||||
tokenization_bit_distance: float = 0.2
|
||||
tokenize_padding: bool = True
|
||||
tokenize_padding: bool = False # changing this to false seems to help better find weak signals
|
||||
merge_tokens: bool = True
|
||||
|
||||
# Threshold parameters used during semantic analysis
|
||||
subset_selection_size: float = 0.25
|
||||
max_intra_cluster_distance: float = 0.20
|
||||
max_intra_cluster_distance: float = 0.10 # normally 0.25
|
||||
min_j1979_correlation: float = 0.85
|
||||
# fuzzy_labeling: bool = True
|
||||
|
||||
|
||||
# A timer class to record timings throughout the pipeline.
|
||||
a_timer = PipelineTimer(verbose=True)
|
||||
|
||||
|
@ -112,7 +117,7 @@ class Sample:
|
|||
# Move back to root of './output/make_model_year/sample_index/"
|
||||
chdir("../../../")
|
||||
|
||||
def pre_process(self):
|
||||
def pre_process(self, given_arb_id):
|
||||
self.make_and_move_to_vehicle_directory()
|
||||
pre_processor = PreProcessor(self.path, pickle_arb_id_filename, pickle_j1979_filename, self.use_j1979)
|
||||
id_dictionary, j1979_dictionary = pre_processor.generate_arb_id_dictionary(a_timer,
|
||||
|
@ -120,6 +125,7 @@ class Sample:
|
|||
time_conversion,
|
||||
freq_analysis_accuracy,
|
||||
freq_synchronous_threshold,
|
||||
given_arb_id,
|
||||
force_pre_processing)
|
||||
if dump_to_pickle:
|
||||
if force_pre_processing:
|
||||
|
@ -303,3 +309,37 @@ class Sample:
|
|||
plot_dendrogram(a_timer=a_timer, linkage_matrix=linkage_matrix, threshold=self.max_inter_cluster_dist,
|
||||
vehicle_number=vehicle_number, force=force_dendrogram_plotting)
|
||||
self.move_back_to_parent_directory()
|
||||
|
||||
def transform_signals(self, id_dictionary: dict):
|
||||
self.make_and_move_to_vehicle_directory()
|
||||
transform_dict = transform_signals(a_timer=a_timer,
|
||||
arb_id_dict=id_dictionary,
|
||||
transform_pickle_filename=pickle_transform_filename,
|
||||
normalize_strategy=signal_normalize_strategy,
|
||||
force=force_transform)
|
||||
self.move_back_to_parent_directory()
|
||||
return transform_dict
|
||||
|
||||
def transform_signal(self, id_dictionary: dict, signal_dict: dict, arb_id: int):
|
||||
self.make_and_move_to_vehicle_directory()
|
||||
transform_dict = transform_signal(a_timer=a_timer,
|
||||
arb_id_dict=id_dictionary,
|
||||
signal_dict=signal_dict,
|
||||
transform_pickle_filename=pickle_transform_filename,
|
||||
normalize_strategy=signal_normalize_strategy,
|
||||
given_arb_id=arb_id,
|
||||
force=force_transform)
|
||||
self.move_back_to_parent_directory()
|
||||
return transform_dict
|
||||
|
||||
def plot_known_signal_cluster(self, cluster_dictionary: dict, signal_dictionary: dict, use_j1979_tags: bool,
|
||||
known_signal: int, vehicle_number: str):
|
||||
self.make_and_move_to_vehicle_directory()
|
||||
plot_known_signal_cluster(a_timer=a_timer,
|
||||
cluster_dict=cluster_dictionary,
|
||||
signal_dict=signal_dictionary,
|
||||
use_j1979_tags=use_j1979_tags,
|
||||
vehicle_number=vehicle_number,
|
||||
given_arb_id=known_signal,
|
||||
force=force_cluster_plotting)
|
||||
self.move_back_to_parent_directory()
|
|
@ -1,5 +1,5 @@
|
|||
from pandas import concat, DataFrame, read_csv
|
||||
from numpy import ndarray, zeros
|
||||
from numpy import ndarray, zeros, clip
|
||||
from os import path, remove
|
||||
from pickle import load, dump
|
||||
from ast import literal_eval
|
||||
|
@ -77,7 +77,7 @@ def signal_clustering(corr_matrix: DataFrame,
|
|||
corr_matrix.where(corr_matrix > 0, 0, inplace=True)
|
||||
corr_matrix = 1 - corr_matrix
|
||||
X = corr_matrix.values # type: ndarray
|
||||
Y = ssd.squareform(X)
|
||||
Y = clip(ssd.squareform(X), 0, None)
|
||||
# Z is the linkage matrix. This can serve as input to the scipy.cluster.hierarchy.dendrogram method
|
||||
Z = linkage(Y, method='single', optimal_ordering=True)
|
||||
fclus = fcluster(Z, t=threshold, criterion='distance')
|
||||
|
|
Loading…
Reference in New Issue