1 /**
2  * Copyright: Copyright Jason White, 2016
3  * License:   MIT
4  * Authors:   Jason White
5  *
6  * Description:
7  * The tracer traces system calls to determine inputs and outputs. This is very
8  * slow and should only be used as a last resort when there are no other
9  * suitable handlers.
10  *
11  * FIXME: Implement this using ptrace directly. This will eliminate the
12  * dependency on strace, as it is not installed by default. It will also
13  * eliminate the small amount of overhead of spawning an extra process.
14  */
15 module button.handlers.tracer.strace;
16 
17 version (linux):
18 
19 import button.resource;
20 import button.context;
21 
22 import io.file;
23 
24 private struct Trace
25 {
26     private
27     {
28         import std.regex : regex;
29 
30         static re_open   = regex(`open\("([^"]*)", ([^,)]*)`);
31         static re_creat  = regex(`creat\("([^"]*)",`);
32         static re_rename = regex(`rename\("([^"]*)", "([^"]*)"\)`);
33         static re_mkdir  = regex(`mkdir\("([^"]*)", (0[0-7]*)\)`);
34         static re_chdir  = regex(`chdir\("([^"]*)"\)`);
35     }
36 
37     /**
38      * Paths that start with these fragments are ignored.
39      */
40     private static immutable ignoredPaths = [
41         "/dev/",
42         "/etc/",
43         "/proc/",
44         "/tmp/",
45         "/usr/",
46     ];
47 
48     /**
49      * Returns: True if the given path should be ignored, false otherwise.
50      */
51     private static bool ignorePath(const(char)[] path) pure nothrow
52     {
53         import std.algorithm.searching : startsWith;
54 
55         foreach (ignored; ignoredPaths)
56         {
57             if (path.startsWith(ignored))
58                 return true;
59         }
60 
61         return false;
62     }
63 
64     private
65     {
66         import std.container.rbtree;
67 
68         // Current working directories of each tracked process.
69         string[int] processes;
70 
71         RedBlackTree!string inputs, outputs;
72     }
73 
74     void dump(ref Resources implicitInputs, ref Resources implicitOutputs)
75     {
76         implicitInputs.put(inputs[]);
77         implicitOutputs.put(outputs[]);
78     }
79 
80     string filePath(int pid, const(char)[] path)
81     {
82         import std.path : buildNormalizedPath;
83 
84         if (auto p = pid in processes)
85             return buildNormalizedPath(*p, path);
86 
87         return buildNormalizedPath(path);
88     }
89 
90     void parse(File f)
91     {
92         import io.text;
93         import std.conv : parse, ConvException;
94         import std.string : stripLeft;
95         import std.algorithm.searching : startsWith;
96         import std.regex : matchFirst;
97 
98         inputs = redBlackTree!string();
99         outputs = redBlackTree!string();
100 
101         foreach (line; f.byLine)
102         {
103             int pid;
104 
105             try
106                 pid = line.parse!int();
107             catch (ConvException e)
108                 continue;
109 
110             line = line.stripLeft(" \t");
111 
112             if (line.startsWith("open"))
113             {
114                 auto captures = line.matchFirst(re_open);
115                 if (captures.empty)
116                     continue;
117 
118                 open(pid, captures[1], captures[2]);
119             }
120             else if (line.startsWith("creat"))
121             {
122                 auto captures = line.matchFirst(re_open);
123                 if (captures.empty)
124                     continue;
125 
126                 creat(pid, captures[1]);
127             }
128             else if (line.startsWith("rename"))
129             {
130                 auto captures = line.matchFirst(re_rename);
131                 if (captures.empty)
132                     continue;
133 
134                 rename(pid, captures[1], captures[2]);
135             }
136             else if (line.startsWith("mkdir"))
137             {
138                 auto captures = line.matchFirst(re_mkdir);
139                 if (captures.empty)
140                     continue;
141 
142                 mkdir(pid, captures[1]);
143             }
144             else if (line.startsWith("chdir"))
145             {
146                 auto captures = line.matchFirst(re_chdir);
147                 if (captures.empty)
148                     continue;
149 
150                 chdir(pid, captures[1]);
151             }
152         }
153     }
154 
155     void open(int pid, const(char)[] path, const(char)[] flags)
156     {
157         import std.algorithm.iteration : splitter;
158 
159         if (ignorePath(path))
160             return;
161 
162         foreach (flag; splitter(flags, '|'))
163         {
164             if (flag == "O_WRONLY" || flag == "O_RDWR")
165             {
166                 // Opened in write mode. It's an output even if it was read
167                 // before.
168                 auto f = filePath(pid, path);
169                 inputs.removeKey(f);
170                 outputs.insert(f);
171                 break;
172             }
173             else if (flag == "O_RDONLY")
174             {
175                 // Opened in read-only mode. It's an input unless it's already
176                 // an output. Consider the scenario of writing a new file and
177                 // then reading it back in. In such cases, the file should only
178                 // be considered an output.
179                 auto f = filePath(pid, path);
180                 if (f !in outputs)
181                     inputs.insert(f);
182                 break;
183             }
184         }
185     }
186 
187     void creat(int pid, const(char)[] path)
188     {
189         if (ignorePath(path))
190             return;
191 
192         outputs.insert(filePath(pid, path));
193     }
194 
195     void rename(int pid, const(char)[] from, const(char)[] to)
196     {
197         if (ignorePath(to))
198             return;
199 
200         auto output = filePath(pid, to);
201         outputs.removeKey(filePath(pid, from));
202         inputs.removeKey(output);
203         outputs.insert(output);
204     }
205 
206     void mkdir(int pid, const(char)[] dir)
207     {
208         outputs.insert(filePath(pid, dir));
209     }
210 
211     void chdir(int pid, const(char)[] path)
212     {
213         processes[pid] = path.idup;
214     }
215 }
216 
217 void execute(
218         ref BuildContext ctx,
219         const(string)[] args,
220         string workDir,
221         ref Resources inputs,
222         ref Resources outputs
223         )
224 {
225     import button.handlers.base : base = execute;
226 
227     import std.file : remove;
228 
229     auto traceLog = tempFile(AutoDelete.no).path;
230     scope (exit) remove(traceLog);
231 
232     auto traceArgs = [
233         "strace",
234 
235         // Follow child processes
236         "-f",
237 
238         // Output to a file to avoid mixing the child's output
239         "-o", traceLog,
240 
241         // Only trace the sys calls we are interested in
242         "-e", "trace=open,creat,rename,mkdir,chdir",
243         ] ~ args;
244 
245     base(ctx, traceArgs, workDir, inputs, outputs);
246 
247     // Parse the trace log to determine dependencies
248     auto strace = Trace();
249     strace.parse(File(traceLog));
250     strace.dump(inputs, outputs);
251 }