1 /** 2 * Copyright: Copyright Jason White, 2016 3 * License: MIT 4 * Authors: Jason White 5 * 6 * Description: 7 * The tracer traces system calls to determine inputs and outputs. This is very 8 * slow and should only be used as a last resort when there are no other 9 * suitable handlers. 10 * 11 * FIXME: Implement this using ptrace directly. This will eliminate the 12 * dependency on strace, as it is not installed by default. It will also 13 * eliminate the small amount of overhead of spawning an extra process. 14 */ 15 module button.handlers.tracer.strace; 16 17 version (linux): 18 19 import button.resource; 20 import button.context; 21 22 import io.file; 23 24 private struct Trace 25 { 26 private 27 { 28 import std.regex : regex; 29 30 static re_open = regex(`open\("([^"]*)", ([^,)]*)`); 31 static re_creat = regex(`creat\("([^"]*)",`); 32 static re_rename = regex(`rename\("([^"]*)", "([^"]*)"\)`); 33 static re_mkdir = regex(`mkdir\("([^"]*)", (0[0-7]*)\)`); 34 static re_chdir = regex(`chdir\("([^"]*)"\)`); 35 } 36 37 /** 38 * Paths that start with these fragments are ignored. 39 */ 40 private static immutable ignoredPaths = [ 41 "/dev/", 42 "/etc/", 43 "/proc/", 44 "/tmp/", 45 "/usr/", 46 ]; 47 48 /** 49 * Returns: True if the given path should be ignored, false otherwise. 50 */ 51 private static bool ignorePath(const(char)[] path) pure nothrow 52 { 53 import std.algorithm.searching : startsWith; 54 55 foreach (ignored; ignoredPaths) 56 { 57 if (path.startsWith(ignored)) 58 return true; 59 } 60 61 return false; 62 } 63 64 private 65 { 66 import std.container.rbtree; 67 68 // Current working directories of each tracked process. 69 string[int] processes; 70 71 RedBlackTree!string inputs, outputs; 72 } 73 74 void dump(ref Resources implicitInputs, ref Resources implicitOutputs) 75 { 76 implicitInputs.put(inputs[]); 77 implicitOutputs.put(outputs[]); 78 } 79 80 string filePath(int pid, const(char)[] path) 81 { 82 import std.path : buildNormalizedPath; 83 84 if (auto p = pid in processes) 85 return buildNormalizedPath(*p, path); 86 87 return buildNormalizedPath(path); 88 } 89 90 void parse(File f) 91 { 92 import io.text; 93 import std.conv : parse, ConvException; 94 import std.string : stripLeft; 95 import std.algorithm.searching : startsWith; 96 import std.regex : matchFirst; 97 98 inputs = redBlackTree!string(); 99 outputs = redBlackTree!string(); 100 101 foreach (line; f.byLine) 102 { 103 int pid; 104 105 try 106 pid = line.parse!int(); 107 catch (ConvException e) 108 continue; 109 110 line = line.stripLeft(" \t"); 111 112 if (line.startsWith("open")) 113 { 114 auto captures = line.matchFirst(re_open); 115 if (captures.empty) 116 continue; 117 118 open(pid, captures[1], captures[2]); 119 } 120 else if (line.startsWith("creat")) 121 { 122 auto captures = line.matchFirst(re_open); 123 if (captures.empty) 124 continue; 125 126 creat(pid, captures[1]); 127 } 128 else if (line.startsWith("rename")) 129 { 130 auto captures = line.matchFirst(re_rename); 131 if (captures.empty) 132 continue; 133 134 rename(pid, captures[1], captures[2]); 135 } 136 else if (line.startsWith("mkdir")) 137 { 138 auto captures = line.matchFirst(re_mkdir); 139 if (captures.empty) 140 continue; 141 142 mkdir(pid, captures[1]); 143 } 144 else if (line.startsWith("chdir")) 145 { 146 auto captures = line.matchFirst(re_chdir); 147 if (captures.empty) 148 continue; 149 150 chdir(pid, captures[1]); 151 } 152 } 153 } 154 155 void open(int pid, const(char)[] path, const(char)[] flags) 156 { 157 import std.algorithm.iteration : splitter; 158 159 if (ignorePath(path)) 160 return; 161 162 foreach (flag; splitter(flags, '|')) 163 { 164 if (flag == "O_WRONLY" || flag == "O_RDWR") 165 { 166 // Opened in write mode. It's an output even if it was read 167 // before. 168 auto f = filePath(pid, path); 169 inputs.removeKey(f); 170 outputs.insert(f); 171 break; 172 } 173 else if (flag == "O_RDONLY") 174 { 175 // Opened in read-only mode. It's an input unless it's already 176 // an output. Consider the scenario of writing a new file and 177 // then reading it back in. In such cases, the file should only 178 // be considered an output. 179 auto f = filePath(pid, path); 180 if (f !in outputs) 181 inputs.insert(f); 182 break; 183 } 184 } 185 } 186 187 void creat(int pid, const(char)[] path) 188 { 189 if (ignorePath(path)) 190 return; 191 192 outputs.insert(filePath(pid, path)); 193 } 194 195 void rename(int pid, const(char)[] from, const(char)[] to) 196 { 197 if (ignorePath(to)) 198 return; 199 200 auto output = filePath(pid, to); 201 outputs.removeKey(filePath(pid, from)); 202 inputs.removeKey(output); 203 outputs.insert(output); 204 } 205 206 void mkdir(int pid, const(char)[] dir) 207 { 208 outputs.insert(filePath(pid, dir)); 209 } 210 211 void chdir(int pid, const(char)[] path) 212 { 213 processes[pid] = path.idup; 214 } 215 } 216 217 void execute( 218 ref BuildContext ctx, 219 const(string)[] args, 220 string workDir, 221 ref Resources inputs, 222 ref Resources outputs 223 ) 224 { 225 import button.handlers.base : base = execute; 226 227 import std.file : remove; 228 229 auto traceLog = tempFile(AutoDelete.no).path; 230 scope (exit) remove(traceLog); 231 232 auto traceArgs = [ 233 "strace", 234 235 // Follow child processes 236 "-f", 237 238 // Output to a file to avoid mixing the child's output 239 "-o", traceLog, 240 241 // Only trace the sys calls we are interested in 242 "-e", "trace=open,creat,rename,mkdir,chdir", 243 ] ~ args; 244 245 base(ctx, traceArgs, workDir, inputs, outputs); 246 247 // Parse the trace log to determine dependencies 248 auto strace = Trace(); 249 strace.parse(File(traceLog)); 250 strace.dump(inputs, outputs); 251 }