From e9749a5383582b87d8140f2f44afcecdfab95b89 Mon Sep 17 00:00:00 2001 From: Tom Wilkie Date: Wed, 18 May 2016 14:08:05 +0100 Subject: [PATCH] Make scheduler aware of test parallelisation --- .gitignore | 2 ++ sched | 26 ++++++++++++++------------ scheduler/main.py | 15 ++++++++++++++- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index f5c83426c..b6ea60f8f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ cover/cover socks/proxy socks/image.tar runner/runner +*.pyc +*~ diff --git a/sched b/sched index e94e8af8f..cf47773e5 100755 --- a/sched +++ b/sched @@ -1,36 +1,38 @@ #!/usr/bin/python import sys, string, json, urllib import requests +import optparse -BASE_URL="http://positive-cocoa-90213.appspot.com" - -def test_time(test_name, runtime): - r = requests.post(BASE_URL + "/record/%s/%f" % (urllib.quote(test_name, safe=""), runtime)) +def test_time(target, test_name, runtime): + r = requests.post(target + "/record/%s/%f" % (urllib.quote(test_name, safe=""), runtime)) print r.text assert r.status_code == 204 -def test_sched(test_run, shard_count, shard_id): +def test_sched(target, test_run, shard_count, shard_id): tests = json.dumps({'tests': string.split(sys.stdin.read())}) - r = requests.post(BASE_URL + "/schedule/%s/%d/%d" % (test_run, shard_count, shard_id), data=tests) + r = requests.post(target + "/schedule/%s/%d/%d" % (test_run, shard_count, shard_id), data=tests) assert r.status_code == 200 result = r.json() for test in sorted(result['tests']): print test def usage(): - print "%s " % sys.argv[0] + print "%s (--target=...) " % sys.argv[0] print " time " print " sched " def main(): - if len(sys.argv) < 4: + parser = optparse.OptionParser() + parser.add_option('--target', default="http://positive-cocoa-90213.appspot.com") + options, args = parser.parse_args() + if len(args) < 3: usage() sys.exit(1) - if sys.argv[1] == "time": - test_time(sys.argv[2], float(sys.argv[3])) - elif sys.argv[1] == "sched": - test_sched(sys.argv[2], int(sys.argv[3]), int(sys.argv[4])) + if args[0] == "time": + test_time(options.target, args[1], float(args[2])) + elif args[0] == "sched": + test_sched(options.target, args[1], int(args[2]), int(args[3])) else: usage() diff --git a/scheduler/main.py b/scheduler/main.py index f509f0e1f..8907e202d 100644 --- a/scheduler/main.py +++ b/scheduler/main.py @@ -23,6 +23,19 @@ class Test(ndb.Model): total_run_time = ndb.FloatProperty(default=0.) # Not total, but a EWMA total_runs = ndb.IntegerProperty(default=0) + def parallelism(self): + name = self.key.string_id() + m = re.search('(\d+)_test.sh$', name) + if m is None: + return 1 + else: + return int(m.group(1)) + + def cost(self): + p = self.parallelism() + logging.info("Test %s has parallelism %d and avg run time %s", self.key.string_id(), p, self.total_run_time) + return self.parallelism() * self.total_run_time + class Schedule(ndb.Model): shards = ndb.JsonProperty() @@ -52,7 +65,7 @@ def schedule(test_run, shard_count, shard): test_times = ndb.get_multi(ndb.Key(Test, test_name) for test_name in test_names) def avg(test): if test is not None: - return test.total_run_time + return test.cost() return 1 test_times = [(test_name, avg(test)) for test_name, test in zip(test_names, test_times)] test_times_dict = dict(test_times)