1 /**
2  * Copyright: Copyright Jason White, 2016
3  * License:   MIT
4  * Authors:   Jason White
5  *
6  * Description:
7  * The tracer traces system calls to determine inputs and outputs. This is very
8  * slow and should only be used as a last resort when there are no other
9  * suitable handlers.
10  *
11  * FIXME: Implement this using ptrace directly. This will eliminate the
12  * dependency on strace, as it is not installed by default. It will also
13  * eliminate the small amount of overhead of spawning an extra process.
14  */
15 module button.handlers.tracer.strace;
16 
17 version (linux):
18 
19 import button.log;
20 import button.resource;
21 import button.context;
22 
23 import io.file;
24 
25 private struct Trace
26 {
27     private
28     {
29         import std.regex : regex;
30 
31         static re_open   = regex(`open\("([^"]*)", ([^,)]*)`);
32         static re_creat  = regex(`creat\("([^"]*)",`);
33         static re_rename = regex(`rename\("([^"]*)", "([^"]*)"\)`);
34         static re_mkdir  = regex(`mkdir\("([^"]*)", (0[0-7]*)\)`);
35         static re_chdir  = regex(`chdir\("([^"]*)"\)`);
36     }
37 
38     /**
39      * Paths that start with these fragments are ignored.
40      */
41     private static immutable ignoredPaths = [
42         "/dev/",
43         "/etc/",
44         "/proc/",
45         "/tmp/",
46         "/usr/",
47     ];
48 
49     /**
50      * Returns: True if the given path should be ignored, false otherwise.
51      */
52     private static bool ignorePath(const(char)[] path) pure nothrow
53     {
54         import std.algorithm.searching : startsWith;
55 
56         foreach (ignored; ignoredPaths)
57         {
58             if (path.startsWith(ignored))
59                 return true;
60         }
61 
62         return false;
63     }
64 
65     private
66     {
67         import std.container.rbtree;
68 
69         // Current working directories of each tracked process.
70         string[int] processes;
71 
72         RedBlackTree!string inputs, outputs;
73     }
74 
75     void dump(ref Resources implicitInputs, ref Resources implicitOutputs)
76     {
77         implicitInputs.put(inputs[]);
78         implicitOutputs.put(outputs[]);
79     }
80 
81     string filePath(int pid, const(char)[] path)
82     {
83         import std.path : buildNormalizedPath;
84 
85         if (auto p = pid in processes)
86             return buildNormalizedPath(*p, path);
87 
88         return buildNormalizedPath(path);
89     }
90 
91     void parse(File f)
92     {
93         import io.text;
94         import std.conv : parse, ConvException;
95         import std.string : munch;
96         import std.algorithm.searching : startsWith;
97         import std.regex : matchFirst;
98 
99         inputs = redBlackTree!string();
100         outputs = redBlackTree!string();
101 
102         foreach (line; f.byLine)
103         {
104             int pid;
105 
106             try
107                 pid = line.parse!int();
108             catch (ConvException e)
109                 continue;
110 
111             line.munch(" \t");
112 
113             if (line.startsWith("open"))
114             {
115                 auto captures = line.matchFirst(re_open);
116                 if (captures.empty)
117                     continue;
118 
119                 open(pid, captures[1], captures[2]);
120             }
121             else if (line.startsWith("creat"))
122             {
123                 auto captures = line.matchFirst(re_open);
124                 if (captures.empty)
125                     continue;
126 
127                 creat(pid, captures[1]);
128             }
129             else if (line.startsWith("rename"))
130             {
131                 auto captures = line.matchFirst(re_rename);
132                 if (captures.empty)
133                     continue;
134 
135                 rename(pid, captures[1], captures[2]);
136             }
137             else if (line.startsWith("mkdir"))
138             {
139                 auto captures = line.matchFirst(re_mkdir);
140                 if (captures.empty)
141                     continue;
142 
143                 mkdir(pid, captures[1]);
144             }
145             else if (line.startsWith("chdir"))
146             {
147                 auto captures = line.matchFirst(re_chdir);
148                 if (captures.empty)
149                     continue;
150 
151                 chdir(pid, captures[1]);
152             }
153         }
154     }
155 
156     void open(int pid, const(char)[] path, const(char)[] flags)
157     {
158         import std.algorithm.iteration : splitter;
159 
160         if (ignorePath(path))
161             return;
162 
163         foreach (flag; splitter(flags, '|'))
164         {
165             if (flag == "O_WRONLY" || flag == "O_RDWR")
166             {
167                 // Opened in write mode. It's an output even if it was read
168                 // before.
169                 auto f = filePath(pid, path);
170                 inputs.removeKey(f);
171                 outputs.insert(f);
172                 break;
173             }
174             else if (flag == "O_RDONLY")
175             {
176                 // Opened in read-only mode. It's an input unless it's already
177                 // an output. Consider the scenario of writing a new file and
178                 // then reading it back in. In such cases, the file should only
179                 // be considered an output.
180                 auto f = filePath(pid, path);
181                 if (f !in outputs)
182                     inputs.insert(f);
183                 break;
184             }
185         }
186     }
187 
188     void creat(int pid, const(char)[] path)
189     {
190         if (ignorePath(path))
191             return;
192 
193         outputs.insert(filePath(pid, path));
194     }
195 
196     void rename(int pid, const(char)[] from, const(char)[] to)
197     {
198         if (ignorePath(to))
199             return;
200 
201         auto output = filePath(pid, to);
202         outputs.removeKey(filePath(pid, from));
203         inputs.removeKey(output);
204         outputs.insert(output);
205     }
206 
207     void mkdir(int pid, const(char)[] dir)
208     {
209         outputs.insert(filePath(pid, dir));
210     }
211 
212     void chdir(int pid, const(char)[] path)
213     {
214         processes[pid] = path.idup;
215     }
216 }
217 
218 void execute(
219         ref BuildContext ctx,
220         const(string)[] args,
221         string workDir,
222         ref Resources inputs,
223         ref Resources outputs,
224         TaskLogger logger
225         )
226 {
227     import button.handlers.base : base = execute;
228 
229     import std.file : remove;
230 
231     auto traceLog = tempFile(AutoDelete.no).path;
232     scope (exit) remove(traceLog);
233 
234     auto traceArgs = [
235         "strace",
236 
237         // Follow child processes
238         "-f",
239 
240         // Output to a file to avoid mixing the child's output
241         "-o", traceLog,
242 
243         // Only trace the sys calls we are interested in
244         "-e", "trace=open,creat,rename,mkdir,chdir",
245         ] ~ args;
246 
247     base(ctx, traceArgs, workDir, inputs, outputs, logger);
248 
249     // Parse the trace log to determine dependencies
250     auto strace = Trace();
251     strace.parse(File(traceLog));
252     strace.dump(inputs, outputs);
253 }