1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
use std::io::{self, BufRead};
use log::warn;
use crate::collapse::common::Occurrences;
use crate::collapse::Collapse;
// These are the identifying words of the callgraph table, note that ticks and bytes columns are optional so not present
static START_LINE: &[&str] = &[
"COST", "CENTRE", "MODULE", "SRC", "no.", "entries", "%time", "%alloc", "%time", "%alloc",
];
/// `ghcprof` folder configuration options.
#[derive(Clone, Debug, Default)]
#[non_exhaustive]
pub struct Options {
/// Column to source associated value from, default is `Source::PercentTime`.
pub source: Source,
}
/// Which prof column to use as the cost centre of the output stacks
#[derive(Clone, Debug, Default)]
#[non_exhaustive]
pub enum Source {
#[default]
/// The indivial %time column representing individual time as a percent of the total
PercentTime,
/// The ticks column representing individual runtime ticks
Ticks,
/// The bytes column representing individual bytes allocated
Bytes,
}
/// A stack collapser for the output of `ghc`'s prof files.
///
/// To construct one, either use `ghcprof::Folder::default()` or create an [`Options`] and use
/// `ghcprof::Folder::from(options)`.
#[derive(Clone, Default)]
pub struct Folder {
/// Cost for the current stack frame.
current_cost: usize,
/// Function on the stack in this entry thus far.
stack: Vec<String>,
opt: Options,
}
// The starting character offset of important columns
#[derive(Debug)]
struct Cols {
cost_centre: usize,
module: usize,
source: usize,
}
impl Collapse for Folder {
fn collapse<R, W>(&mut self, mut reader: R, writer: W) -> io::Result<()>
where
R: io::BufRead,
W: io::Write,
{
// Consume the header...
let mut line = Vec::new();
let cols = loop {
line.clear();
if reader.read_until(b'\n', &mut line)? == 0 {
warn!("File ended before start of call graph");
return Ok(());
};
let l = String::from_utf8_lossy(&line);
if l.split_whitespace()
.take(START_LINE.len())
.eq(START_LINE.iter().cloned())
{
let cost_centre = 0;
let module = l.find("MODULE").unwrap_or(0);
// Pick out these fixed columns, first two are individual only
// "%time %alloc %time %alloc"
// `ticks` and `bytes` columns are optional and might appear on the end
// ticks header is right aligned
// bytes header is right aligned
// - BUT it has a max width of 9 whilst its values can exceed (but are always space separted)
// "%time %alloc %time %alloc ticks bytes"
let source = match self.opt.source {
Source::PercentTime => l
.find("%time")
.expect("%time is present from matching START_LINE"),
// See note above about ticks and bytes columns
Source::Ticks => one_off_end_of_col_before(l.as_ref(), "ticks")?,
Source::Bytes => one_off_end_of_col_before(l.as_ref(), "bytes")?,
};
break Cols {
cost_centre,
module,
source,
};
}
};
// Skip one line
reader.read_until(b'\n', &mut line)?;
// Process the data...
let mut occurrences = Occurrences::new(1);
loop {
line.clear();
if reader.read_until(b'\n', &mut line)? == 0 {
// The format is not expected to contain any blank lines within the callgraph
break;
}
let l = String::from_utf8_lossy(&line);
let line = l.trim_end();
if line.is_empty() {
break;
} else {
self.on_line(line, &mut occurrences, &cols)?;
}
}
// Write the results...
occurrences.write_and_clear(writer)?;
// Reset the state...
self.current_cost = 0;
self.stack.clear();
Ok(())
}
/// Check for start line of a call graph.
fn is_applicable(&mut self, input: &str) -> Option<bool> {
let mut input = input.as_bytes();
let mut line = String::new();
loop {
line.clear();
if let Ok(n) = input.read_line(&mut line) {
if n == 0 {
break;
}
} else {
return Some(false);
}
if line
.split_whitespace()
.take(START_LINE.len())
.eq(START_LINE.iter().cloned())
{
return Some(true);
}
}
None
}
}
fn one_off_end_of_col_before(line: &str, col: &str) -> io::Result<usize> {
let col_start = match line.find(col) {
Some(col_start) => col_start,
_ => return invalid_data_error!("Expected '{col}' column but it was not present"),
};
let col_end = match line[..col_start].rfind(|c: char| !c.is_whitespace()) {
Some(col_end) => col_end,
_ => return invalid_data_error!("Expected a column before '{col}' but there was none"),
};
Ok(col_end + 1)
}
impl From<Options> for Folder {
fn from(opt: Options) -> Self {
Folder {
opt,
..Default::default()
}
}
}
impl Folder {
// Handle call graph lines of the form:
//
// MAIN MAIN ...
// CAF Options.Applicative.Builder ...
// defaultPrefs Options.Applicative.Builder ...
// idm Options.Applicative.Builder ...
// prefs Options.Applicative.Builder ...
// fullDesc Options.Applicative.Builder ...
// hidden Options.Applicative.Builder ...
// option Options.Applicative.Builder ...
// metavar Options.Applicative.Builder ...
// CAF Options.Applicative.Builder.Internal ...
// internal Options.Applicative.Builder.Internal ...
// noGlobal Options.Applicative.Builder.Internal ...
// optionMod Options.Applicative.Builder.Internal ...
fn on_line(
&mut self,
line: &str,
occurrences: &mut Occurrences,
cols: &Cols,
) -> io::Result<()> {
if let Some(indent_chars) = line.find(|c| c != ' ') {
let prev_len = self.stack.len();
let depth = indent_chars;
if depth < prev_len {
// If the line is not a child, pop stack to the stack before the new depth
self.stack.truncate(depth);
} else if depth != prev_len {
return invalid_data_error!("Skipped indentation level at line:\n{}", line);
}
// There can be non-ascii names so take care to char offset not byte offset
let string_range = |col_start: usize| {
line.chars()
.skip(col_start)
.skip_while(|c| c.is_whitespace())
// it is expected that the values to extract do not contain whitespace
// since this is used for functions/modules/costs where it is not allowed
.take_while(|c| !c.is_whitespace())
.collect::<String>()
};
let cost = string_range(cols.source);
if let Ok(cost) = cost.trim().parse::<f64>() {
let func = string_range(cols.cost_centre);
let module = string_range(cols.module);
// The columns we extract costs from all exclude the cost of their children
self.current_cost = match self.opt.source {
// We must `insert_or_add` a `usize` so convert to per-mille to not lose the 1dp
Source::PercentTime => cost * 10.0,
Source::Ticks => cost,
Source::Bytes => cost,
} as usize;
self.stack
.push(format!("{}.{}", module.trim(), func.trim()));
// identical stacks from other threads can appear so need to insert or add
occurrences.insert_or_add(self.stack.join(";"), self.current_cost);
} else {
return invalid_data_error!("Invalid cost field: \"{}\"", cost);
}
}
Ok(())
}
}