rbpf-cli: add coverage flag

- adds DWARF parser using gimli
- adds coverage module to derive an execution trace to line numbers
- adds a gcov module to export a coverage profile to gcov intermediate JSON format
- add basic CLI logger
This commit is contained in:
Richard Patel 2022-02-23 00:59:50 +01:00 committed by Dmitri Makarov
parent d63a4fa174
commit 506ee0014a
6 changed files with 813 additions and 474 deletions

907
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -10,7 +10,12 @@ edition = "2021"
publish = false
[dependencies]
bv = "0.11.1"
clap = { version = "3.1.5", features = ["cargo"] }
gimli = "0.26.1"
goblin = { version = "0.5.1", features = ["std"] }
itertools = "0.10.3"
log = { version = "0.4.14", features = ["std"] }
serde = "1.0.136"
serde_json = "1.0.79"
solana-bpf-loader-program = { path = "../programs/bpf_loader", version = "=1.11.0" }

257
rbpf-cli/src/coverage.rs Normal file
View File

@ -0,0 +1,257 @@
use {
crate::gcov::{GcovFile, GcovIntermediate, GcovLine},
bv::{BitVec, BitsMut},
gimli::{Dwarf, EndianSlice, LineProgramHeader, RunTimeEndian, Unit, UnitHeader},
goblin::elf::Elf,
itertools::Itertools,
log::*,
std::{
borrow::Cow,
collections::{BTreeSet, HashMap},
fmt::{Debug, Formatter},
path::{Path, PathBuf},
},
};
#[derive(Default)]
pub(crate) struct FileCoverage {
file_path: Option<PathBuf>,
hits: BTreeSet<(u64, u64)>,
}
impl FileCoverage {
pub(crate) fn new(file_path: Option<PathBuf>) -> Self {
Self {
file_path,
..FileCoverage::default()
}
}
}
#[derive(Default)]
pub(crate) struct Coverage {
hits: HashMap<u64, FileCoverage>,
}
impl Coverage {
pub(crate) fn from_trace(
elf_bytes: &[u8],
elf: &Elf<'_>,
trace: &[[u64; 12]],
) -> Result<Self, Box<dyn std::error::Error>> {
// Find text section.
let text_range = elf
.section_headers
.iter()
.find(|section| elf.shdr_strtab.get_at(section.sh_name) == Some(".text"))
.ok_or("missing .text section")?
.file_range()
.ok_or("invalid .text range")?;
// Create bitmap of executed instructions.
let mut hits = BitVec::<usize>::new_fill(false, (text_range.len() / 8) as u64);
for ins in trace {
hits.set_bit(ins[11], true);
}
// Teach gimli how to load a section from goblin.
let load_section = |id: gimli::SectionId| -> Result<Cow<[u8]>, gimli::Error> {
let file_range = elf
.section_headers
.iter()
.find(|section| {
let section_name = elf.shdr_strtab.get_at(section.sh_name);
section_name == Some(id.name())
})
.and_then(|section| section.file_range());
Ok(match file_range {
None => {
debug!("Section {} not found", id.name());
Cow::Borrowed(&[][..])
}
Some(file_range) => {
let section_bytes = &elf_bytes[file_range.start..file_range.end];
debug!("Section {}: {} bytes", id.name(), section_bytes.len());
Cow::Borrowed(section_bytes)
}
})
};
// Teach gimli how to switch endianness when needed.
let borrow_section: &dyn for<'a> Fn(
&'a Cow<[u8]>,
)
-> gimli::EndianSlice<'a, gimli::RunTimeEndian> =
&|section| gimli::EndianSlice::new(&*section, gimli::RunTimeEndian::Little);
// Load all of the sections.
let dwarf_cow = Dwarf::load(&load_section)?;
// Create `EndianSlice`s for all of the sections.
let dwarf = dwarf_cow.borrow(&borrow_section);
let mut cov = Self::default();
// Iterate over the compilation units.
let mut iter = dwarf.units();
while let Some(header) = iter.next()? {
if let Err(e) = cov.process_unit(&dwarf, header, text_range.start as u64, &hits) {
error!("Failed to extract coverage from compile unit: {:?}", e);
}
}
Ok(cov)
}
fn process_unit(
&mut self,
dwarf: &Dwarf<EndianSlice<'_, RunTimeEndian>>,
header: UnitHeader<EndianSlice<'_, RunTimeEndian>, usize>,
text_section_offset: u64,
hits: &bv::BitVec,
) -> Result<(), Box<dyn std::error::Error>> {
debug!(
"Line number info for unit at <.debug_info+0x{:x}>",
header.offset().as_debug_info_offset().unwrap().0
);
let unit = dwarf.unit(header)?;
// Get the line program for the compilation unit.
let program = match unit.line_program.clone() {
None => return Ok(()),
Some(program) => program,
};
let comp_dir = if let Some(ref dir) = unit.comp_dir {
PathBuf::from(dir.to_string_lossy().into_owned())
} else {
PathBuf::new()
};
// Iterate over the line program rows.
let mut rows = program.rows();
while let Some((header, row)) = rows.next_row()? {
if row.end_sequence() {
warn!(
"Possible gap in addresses: {:x} end-sequence",
row.address()
);
continue;
}
// Determine line/column. DWARF line/column is never 0, so we use that
// but other applications may want to display this differently.
let line = match row.line() {
Some(line) => line.get(),
None => 0,
};
let column = match row.column() {
gimli::ColumnType::LeftEdge => 0,
gimli::ColumnType::Column(column) => column.get(),
};
if let Some(ins_index) = row
.address()
.checked_sub(text_section_offset)
.map(|x| x / 8)
{
if hits[ins_index] {
self.file_coverage(&comp_dir, dwarf, &unit, row, header)
.hits
.insert((line, column));
}
}
}
Ok(())
}
fn file_coverage(
&mut self,
comp_dir: &Path,
dwarf: &Dwarf<EndianSlice<'_, RunTimeEndian>>,
unit: &Unit<EndianSlice<'_, RunTimeEndian>>,
row: &gimli::LineRow,
header: &LineProgramHeader<EndianSlice<'_, RunTimeEndian>, usize>,
) -> &mut FileCoverage {
let file_index = row.file_index();
self.hits.entry(file_index).or_insert_with(|| {
// Create new FileCoverage object.
// Read path from ELF.
let file_path = row.file(header).and_then(|file| {
let mut path = PathBuf::from(comp_dir);
// The directory index 0 is defined to correspond to the compilation unit directory.
if file.directory_index() != 0 {
if let Some(dir) = file.directory(header) {
path.push(
dwarf
.attr_string(unit, dir)
.ok()?
.to_string_lossy()
.as_ref(),
);
}
}
path.push(
dwarf
.attr_string(unit, file.path_name())
.ok()?
.to_string_lossy()
.as_ref(),
);
Some(path)
});
// Return newly created file cov object.
FileCoverage::new(file_path)
})
}
}
impl Debug for Coverage {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
for file_cov in self.hits.values() {
let file_path = match file_cov.file_path.as_ref() {
Some(p) => p,
None => continue,
};
for (line, number) in &file_cov.hits {
writeln!(f, "file={:?} line={} col={}", file_path, line, number)?;
}
}
Ok(())
}
}
impl From<&Coverage> for GcovIntermediate {
fn from(cov: &Coverage) -> Self {
GcovIntermediate {
files: cov.hits.values().map(|file| file.into()).collect(),
}
}
}
impl From<&FileCoverage> for GcovFile {
fn from(cov: &FileCoverage) -> Self {
let lines = cov
.hits
.iter()
.group_by(|(line, _)| line)
.into_iter()
.map(|(line, cols)| GcovLine {
line_number: *line,
count: cols.count(), // TODO count actual hits here, not cols
})
.collect::<Vec<_>>();
GcovFile {
file: cov
.file_path
.as_ref()
.map(|x| x.to_string_lossy().to_string())
.unwrap_or_else(|| "".to_string()),
lines,
}
}
}

21
rbpf-cli/src/gcov.rs Normal file
View File

@ -0,0 +1,21 @@
use serde::Serialize;
/// Gcov JSON intermediate format.
///
/// Documented in [man gcov.1](https://man7.org/linux/man-pages/man1/gcov.1.html)
#[derive(Serialize)]
pub struct GcovIntermediate {
pub files: Vec<GcovFile>,
}
#[derive(Serialize)]
pub struct GcovFile {
pub file: String,
pub lines: Vec<GcovLine>,
}
#[derive(Serialize)]
pub struct GcovLine {
pub line_number: u64,
pub count: usize,
}

32
rbpf-cli/src/logger.rs Normal file
View File

@ -0,0 +1,32 @@
use log::{Level, LevelFilter, Log, Metadata, Record};
#[derive(Default)]
pub(crate) struct Logger {
pub(crate) verbose: bool,
}
impl Logger {
pub(crate) fn new(verbose: bool) -> Self {
log::set_max_level(LevelFilter::Debug);
Self { verbose }
}
}
impl Log for Logger {
fn enabled(&self, metadata: &Metadata) -> bool {
let target = if self.verbose {
Level::Debug
} else {
Level::Info
};
metadata.level() <= target
}
fn log(&self, record: &Record) {
if self.enabled(record.metadata()) {
eprintln!("{}", record.args());
}
}
fn flush(&self) {}
}

View File

@ -1,5 +1,12 @@
mod coverage;
mod gcov;
mod logger;
use {
crate::{coverage::Coverage, gcov::GcovIntermediate, logger::Logger},
clap::{crate_version, Arg, Command},
goblin::elf::Elf,
log::*,
serde::{Deserialize, Serialize},
serde_json::Result,
solana_bpf_loader_program::{
@ -44,10 +51,10 @@ struct Input {
fn load_accounts(path: &Path) -> Result<Input> {
let file = File::open(path).unwrap();
let input: Input = serde_json::from_reader(file)?;
eprintln!("Program input:");
eprintln!("accounts {:?}", &input.accounts);
eprintln!("instruction_data {:?}", &input.instruction_data);
eprintln!("----------------------------------------");
info!("Program input:");
info!("accounts {:?}", &input.accounts);
info!("instruction_data {:?}", &input.instruction_data);
info!("----------------------------------------");
Ok(input)
}
@ -146,6 +153,11 @@ native machine code before execting it in the virtual machine.",
.short('t')
.long("trace"),
)
.arg(
Arg::new("verbose")
.help("Show additional information")
.long("verbose"),
)
.arg(
Arg::new("profile")
.help("Output profile to 'profile.dot' file using tracing instrumentation")
@ -167,10 +179,19 @@ native machine code before execting it in the virtual machine.",
.takes_value(true)
.possible_values(&["json", "json-compact"]),
)
.arg(
Arg::new("coverage")
.help("Output coverage profile")
.short('c')
.long("coverage"),
)
.get_matches();
log::set_boxed_logger(Box::new(Logger::new(matches.is_present("verbose")))).unwrap();
let config = Config {
enable_instruction_tracing: matches.is_present("trace") || matches.is_present("profile"),
enable_instruction_tracing: matches.is_present("trace")
|| matches.is_present("profile")
|| matches.is_present("coverage"),
enable_symbol_and_section_labels: true,
..Config::default()
};
@ -322,21 +343,21 @@ native machine code before execting it in the virtual machine.",
println!("{}", serde_json::to_string(&output).unwrap());
}
_ => {
println!("Program output:");
println!("{:?}", output);
info!("Program output:");
info!("{:?}", output);
}
}
if matches.is_present("trace") {
eprintln!("Trace is saved in trace.out");
info!("Trace is saved in trace.out");
let mut file = File::create("trace.out").unwrap();
vm.get_tracer()
.write(&mut file, analysis.analyze())
.unwrap();
}
if matches.is_present("profile") {
eprintln!("Profile is saved in profile.dot");
let tracer = &vm.get_tracer();
info!("Profile is saved in profile.dot");
let tracer = vm.get_tracer();
let analysis = analysis.analyze();
let dynamic_analysis = DynamicAnalysis::new(tracer, analysis);
let mut file = File::create("profile.dot").unwrap();
@ -344,6 +365,30 @@ native machine code before execting it in the virtual machine.",
.visualize_graphically(&mut file, Some(&dynamic_analysis))
.unwrap();
}
if matches.is_present("coverage") {
// Read ELF to buffer.
let elf_size = file.seek(SeekFrom::End(0)).unwrap();
file.seek(SeekFrom::Start(0)).unwrap();
let mut elf_bytes = Vec::<u8>::with_capacity(elf_size as usize);
file.read_to_end(&mut elf_bytes)
.expect("failed to read ELF");
// Parse ELF.
let elf = Elf::parse(&elf_bytes).expect("invalid ELF");
// Get register state trace.
let tracer = vm.get_tracer();
// Create coverage profile.
match Coverage::from_trace(&elf_bytes, &elf, &tracer.log) {
Err(err) => error!("Failed to build coverage profile: {}", err),
Ok(cov) => {
info!("Code coverage is saved in coverage.json");
trace!("{:?}", cov);
let mut file = File::create("coverage.json").unwrap();
let gcov: GcovIntermediate = (&cov).into();
serde_json::to_writer_pretty(&mut file, &gcov)
.expect("Failed to write coverage JSON");
}
}
}
}
#[derive(Serialize)]