199 lines
9.8 KiB
Python
199 lines
9.8 KiB
Python
from os import chdir, mkdir, path, remove
|
|
from pickle import dump
|
|
from sklearn.preprocessing import minmax_scale
|
|
from typing import Callable
|
|
from PreProcessor import PreProcessor
|
|
from LexicalAnalysis import tokenize_dictionary, generate_signals
|
|
from SemanticAnalysis import subset_selection, subset_correlation, greedy_signal_clustering, label_propagation, \
|
|
j1979_signal_labeling
|
|
from Plotter import plot_j1979, plot_signals_by_arb_id, plot_signals_by_cluster
|
|
from PipelineTimer import PipelineTimer
|
|
|
|
# File names for the on-disc data input and output.
|
|
# Input:
|
|
can_data_filename: str = 'drive_runway_afit.log'
|
|
# can_data_filename: str = 'loggerProgram0.log'
|
|
|
|
# Output:
|
|
output_folder: str = 'output'
|
|
pickle_arb_id_filename: str = 'pickleArbIDs.p'
|
|
pickle_j1979_filename: str = 'pickleJ1979.p'
|
|
pickle_signal_filename: str = 'pickleSignals.p'
|
|
pickle_subset_filename: str = 'pickleSubset.p'
|
|
csv_correlation_filename: str = 'subset_correlation_matrix.csv'
|
|
pickle_j1979_correlation: str = 'pickleJ1979_correlation.p'
|
|
pickle_clusters_filename: str = 'pickleClusters.p'
|
|
pickle_all_signal_filename: str = 'pickleAllSignalsDataFrame.p'
|
|
csv_all_signals_filename: str = 'complete_correlation_matrix.csv'
|
|
pickle_timer_filename: str = 'pickleTimer.p'
|
|
|
|
# Change out the normalization strategies as needed.
|
|
tang_normalize_strategy: Callable = minmax_scale
|
|
signal_normalize_strategy: Callable = minmax_scale
|
|
|
|
# Turn on or off portions of the pipeline and output methods using these flags.
|
|
force_pre_processing: bool = False
|
|
force_j1979_plotting: bool = False
|
|
|
|
force_lexical_analysis: bool = False
|
|
force_arb_id_plotting: bool = True
|
|
|
|
force_semantic_analysis: bool = False
|
|
force_signal_labeling: bool = False
|
|
use_j1979_tags_in_plots: bool = True
|
|
force_cluster_plotting: bool = False
|
|
|
|
dump_to_pickle: bool = True
|
|
|
|
# Parameters and threshold used for Arb ID transmission frequency analysis during Pre-processing.
|
|
time_conversion = 1000 # convert seconds to milliseconds
|
|
z_lookup = {.8: 1.28, .9: 1.645, .95: 1.96, .98: 2.33, .99: 2.58}
|
|
freq_analysis_accuracy = z_lookup[0.9]
|
|
freq_synchronous_threshold = 0.1
|
|
|
|
# Threshold parameters used during lexical analysis.
|
|
tokenization_bit_distance: float = 0.2
|
|
tokenize_padding: bool = True
|
|
|
|
# Threshold parameters used during semantic analysis
|
|
subset_selection_size: float = 0.25
|
|
fuzzy_labeling: bool = True
|
|
min_correlation_threshold: float = 0.85
|
|
|
|
# A timer class to record timings throughout the pipeline.
|
|
a_timer = PipelineTimer(verbose=True)
|
|
|
|
# DATA IMPORT AND PRE-PROCESSING #
|
|
pre_processor = PreProcessor(can_data_filename, pickle_arb_id_filename, pickle_j1979_filename)
|
|
id_dictionary, j1979_dictionary = pre_processor.generate_arb_id_dictionary(a_timer,
|
|
tang_normalize_strategy,
|
|
time_conversion,
|
|
freq_analysis_accuracy,
|
|
freq_synchronous_threshold,
|
|
force_pre_processing)
|
|
if j1979_dictionary:
|
|
plot_j1979(a_timer, j1979_dictionary, force_j1979_plotting)
|
|
|
|
|
|
# LEXICAL ANALYSIS #
|
|
print("\n\t\t\t##### BEGINNING LEXICAL ANALYSIS #####")
|
|
tokenize_dictionary(a_timer,
|
|
id_dictionary,
|
|
force_lexical_analysis,
|
|
include_padding=tokenize_padding,
|
|
merge=True,
|
|
max_distance=tokenization_bit_distance)
|
|
signal_dictionary = generate_signals(a_timer,
|
|
id_dictionary,
|
|
pickle_signal_filename,
|
|
signal_normalize_strategy,
|
|
force_lexical_analysis)
|
|
plot_signals_by_arb_id(a_timer, id_dictionary, signal_dictionary, force_arb_id_plotting)
|
|
|
|
# SEMANTIC ANALYSIS #
|
|
print("\n\t\t\t##### BEGINNING SEMANTIC ANALYSIS #####")
|
|
subset_df = subset_selection(a_timer,
|
|
signal_dictionary,
|
|
pickle_subset_filename,
|
|
force_semantic_analysis,
|
|
subset_size=subset_selection_size)
|
|
corr_matrix_subset = subset_correlation(subset_df, csv_correlation_filename, force_semantic_analysis)
|
|
cluster_dict = greedy_signal_clustering(corr_matrix_subset,
|
|
correlation_threshold=min_correlation_threshold,
|
|
fuzzy_labeling=fuzzy_labeling)
|
|
df_full, corr_matrix_full, cluster_dict = label_propagation(a_timer,
|
|
pickle_clusters_filename=pickle_clusters_filename,
|
|
pickle_all_signals_df_filename=pickle_all_signal_filename,
|
|
csv_signals_correlation_filename=csv_all_signals_filename,
|
|
signal_dict=signal_dictionary,
|
|
cluster_dict=cluster_dict,
|
|
correlation_threshold=min_correlation_threshold,
|
|
force=force_semantic_analysis)
|
|
signal_dictionary, j1979_correlations = j1979_signal_labeling(a_timer=a_timer,
|
|
j1979_corr_filename=pickle_j1979_correlation,
|
|
df_signals=df_full,
|
|
j1979_dict=j1979_dictionary,
|
|
signal_dict=signal_dictionary,
|
|
correlation_threshold=min_correlation_threshold,
|
|
force=force_signal_labeling)
|
|
plot_signals_by_cluster(a_timer, cluster_dict, signal_dictionary, use_j1979_tags_in_plots, force_cluster_plotting)
|
|
|
|
# DATA STORAGE #
|
|
if dump_to_pickle:
|
|
if force_pre_processing:
|
|
if path.isfile(pickle_arb_id_filename):
|
|
remove(pickle_arb_id_filename)
|
|
if path.isfile(pickle_j1979_filename):
|
|
remove(pickle_j1979_filename)
|
|
if force_lexical_analysis or force_signal_labeling:
|
|
if path.isfile(pickle_signal_filename):
|
|
remove(pickle_signal_filename)
|
|
if force_semantic_analysis:
|
|
if path.isfile(pickle_subset_filename):
|
|
remove(pickle_subset_filename)
|
|
if path.isfile(csv_correlation_filename):
|
|
remove(csv_correlation_filename)
|
|
if path.isfile(pickle_j1979_correlation):
|
|
remove(pickle_j1979_correlation)
|
|
if path.isfile(pickle_clusters_filename):
|
|
remove(pickle_clusters_filename)
|
|
if path.isfile(pickle_all_signal_filename):
|
|
remove(pickle_all_signal_filename)
|
|
if path.isfile(csv_all_signals_filename):
|
|
remove(csv_all_signals_filename)
|
|
|
|
timer_flag = 0
|
|
if not path.exists(output_folder):
|
|
mkdir(output_folder)
|
|
chdir(output_folder)
|
|
if not path.isfile(pickle_arb_id_filename):
|
|
timer_flag += 1
|
|
print("\nDumping arb ID dictionary to " + pickle_arb_id_filename)
|
|
dump(id_dictionary, open(pickle_arb_id_filename, "wb"))
|
|
print("\tComplete...")
|
|
if not path.isfile(pickle_j1979_filename):
|
|
timer_flag += 1
|
|
print("\nDumping J1979 dictionary to " + pickle_j1979_filename)
|
|
dump(j1979_dictionary, open(pickle_j1979_filename, "wb"))
|
|
print("\tComplete...")
|
|
if not path.isfile(pickle_signal_filename):
|
|
timer_flag += 1
|
|
print("\nDumping signal dictionary to " + pickle_signal_filename)
|
|
dump(signal_dictionary, open(pickle_signal_filename, "wb"))
|
|
print("\tComplete...")
|
|
if not path.isfile(pickle_subset_filename):
|
|
timer_flag += 1
|
|
print("\nDumping signal subset list to " + pickle_subset_filename)
|
|
dump(subset_df, open(pickle_subset_filename, "wb"))
|
|
print("\tComplete...")
|
|
if not path.isfile(csv_correlation_filename):
|
|
timer_flag += 1
|
|
print("\nDumping subset correlation matrix to " + csv_correlation_filename)
|
|
corr_matrix_subset.to_csv(csv_correlation_filename)
|
|
print("\tComplete...")
|
|
if not path.isfile(pickle_j1979_correlation):
|
|
timer_flag += 1
|
|
print("\nDumping J1979 correlation DataFrame to " + pickle_j1979_correlation)
|
|
dump(j1979_correlations, open(pickle_j1979_correlation, "wb"))
|
|
print("\tComplete...")
|
|
if not path.isfile(pickle_clusters_filename):
|
|
timer_flag += 1
|
|
print("\nDumping cluster dictionary to " + pickle_clusters_filename)
|
|
dump(cluster_dict, open(pickle_clusters_filename, "wb"))
|
|
print("\tComplete...")
|
|
if not path.isfile(pickle_all_signal_filename):
|
|
timer_flag += 1
|
|
print("\nDumping complete signals DataFrame to " + pickle_all_signal_filename)
|
|
dump(df_full, open(pickle_all_signal_filename, "wb"))
|
|
print("\tComplete...")
|
|
if not path.isfile(csv_all_signals_filename):
|
|
timer_flag += 1
|
|
print("\nDumping complete correlation matrix to " + csv_all_signals_filename)
|
|
corr_matrix_full.to_csv(csv_all_signals_filename)
|
|
print("\tComplete...")
|
|
if timer_flag is 9:
|
|
print("\nDumping pipeline timer to " + pickle_timer_filename)
|
|
dump(a_timer, open(pickle_timer_filename, "wb"))
|
|
print("\tComplete...")
|
|
chdir("..")
|