|
#############################################################################
##
## This file is part of GAP, a system for computational discrete algebra.
## This file's authors include Frank Lübeck.
##
## Copyright of GAP belongs to its developers, whose names are too numerous
## to list here. Please refer to the COPYRIGHT file for details.
##
## SPDX-License-Identifier: GPL-2.0-or-later
##
## This file contains functions to compute echelon forms of matrices using
## multiple threads.
## It also contains some generic utility functions for writing such code
## (see ThreadFunction, CreateChannelsAndThreads,
## DestroyChannelsAndThreads, DistributeTasks, and their usage in
## TrigonalizeByThreads).
##
## (for the moment we only trigonalize, we can easily extend it to
## semi-echelon and full-echelon if we keep this code.)
##
## # some example input
## time := 0;
## m1 := RandomMat(500,500,GF(5));;
## m := List(m1,ShallowCopy);;
## res := TrigonalizeBySubsets(m,10,25);;time;
## m := List(m1,ShallowCopy);;
## t1:=CurrentTime();res1:=TrigonalizeByThreads(m,4,5,40);;t2:=CurrentTime();
##
# (very?) temporary helper, until ReceiveAnyChannel is available
#
# this version is bad, it eats up CPU cycles and doesn't provide a good
# load balancing
MyReceiveAnyChannel := function(l)
local res, ch;
while true do
for ch in l do
res := TryReceiveChannel(ch, fail);
if res <> fail then
return res;
fi;
od;
od;
end;
#############################################################################
##
## TrigonalizeSubset(mat, pivots, inds)
##
## Arguments: matrix mat with n rows;
## list pivots of length n, such that
## pivots[i] = PositionNonZero(mat[i]) or = 0;
## inds is a sublist of [1..n].
##
## We assume that pivots is monotonically increasing.
## TrigonalizeSubset computes destructively a trigonal form of the submatrix
## mat{inds} using row operations. (So with inds = [1..Length(mat)] the whole
## job would be done.)
##
## This function could easily be provided from the kernel for compressed
## matrices, but it is not so bad as it is.
##
## I have experimented with PositionNonZero, PositionNot, POSITION_NOT,
## but the fastest way to find the new pivots seems to be to use none of
## these. This is probably reasonable for dense matrices, but may not be
## true for very sparse matrices.
##
TrigonalizeSubset := function(mat, pivots, inds)
local t, ncols, zero, res, newpivots, done, piv, v, c, i, pos;
t := Runtime();
ncols := Length(mat[1]);
zero := Zero(mat[1][1]);
res := [];
newpivots := [];
done := [];
# populate result with rows with different pivots
if pivots[inds[1]] > 0 then
piv := 0;
for i in inds do
if pivots[i] > piv then
Add(res, mat[i]);
piv := pivots[i];
Add(newpivots, piv);
done[i] := true;
fi;
od;
fi;
for i in inds do
if not IsBound(done[i]) then
v := mat[i];
piv := pivots[i];
if piv = 0 then
# case of a not seen vector
piv := 1;
while piv <= ncols and v[piv] = zero do
piv := piv+1;
od;
fi;
for pos in [1..Length(res)] do
if newpivots[pos] = piv and piv <= ncols then
# can be reduced by a row in res
c := -v[piv]/res[pos][piv];
AddRowVector(v, res[pos], c);
piv := piv+1;
while piv <= ncols and v[piv] = zero do
piv := piv+1;
od;
elif newpivots[pos] >= piv then
Add(res, v, pos);
Add(newpivots, piv, pos);
break;
fi;
od;
if Length(res) = 0 or newpivots[pos] < piv then
Add(res, v);
Add(newpivots, piv);
fi;
fi;
od;
## We return res and newpivots and let the handler receiving the result
## write them into mat and pivots. Alternatively, we could do this here,
## return a dummy result and use a noop result handler.
## # write result into mat and pivots
## mat{inds} := res;
## pivots{inds} := newpivots;
Info(InfoDebug, 2, Runtime()-t, " ", Length(inds));
## return true;
return [res, newpivots, inds];
end;
#############################################################################
##
## This is an experimental function. Instead of trigonalizing in one step,
## we trigonalize num intervals of rows, then sort all rows such that the
## list of pivots in non-decreasing, and start over. This is done rounds
## times and afterwards the rest is done in a single interval.
## The nice point is here that the total running time is almost the same for
## a large range of values for num and rounds. This means we have split the
## whole work into many small tasks with neglectible overhead.
##
## I have played with various possibilities to vary the lengths of the
## intervals of rows given to TrigonalizeSubset during the algorithm.
## The best seems to be to keep these intervals the same all the time
## and do the cleaning interval-wise for about 2 * #intervals rounds.
## Then very little is left to do for the final cleanup.
##
TrigonalizeBySubsets := function(mat, num, rounds)
local len, pivots, round, ilen, intv, res, i;
len := Length(mat);
# initialize pivots as unknown
pivots := 0*[1..len];
round := 0;
repeat
# the last round must be with num = 1 to ensure a trigonal form
# in the result
if round = rounds then num := 1; fi;
# split rows of mat into num intervals and call TrigonalizeSubset
# for these
ilen := QuoInt(len, num);
for i in [1..num] do
if i < num then
intv := (i-1)*ilen+[1..ilen];
else
intv := [(num-1)*ilen+1..len];
fi;
res := TrigonalizeSubset(mat, pivots, intv);
mat{res[3]} := res[1];
pivots{res[3]} := res[2];
od;
# sort rows by pivot and start over
SortParallel(pivots,mat);
round := round+1;
until ilen=len;
return [mat, pivots];
end;
#############################################################################
##
## So, now lets do the same with several threads.
##
## We first give a straight forward variant TrigonalizeByThreads1.
##
## Looking at the code of TrigonalizeByThreads1 we see that in fact the whole
## mechanism has very little to do with the specific application.
##
## Therefore we provide some completely generic utility functions to start and
## destroy a number of threads and channels. And also a generic function which
## distributes tasks among such a collection of threads.
##
## Using this we can write an elegant and short function
## TrigonalizeByThreads.
##
TrigonalizeByThreads1 := function(mat, nthreads, mult, rounds)
local len, pivots, num, inchs, outchs, handler, threads, ilen, j, nres,
intv, res, i, round, k;
len := Length(mat);
pivots := 0*[1..len];
num := mult * nthreads;
inchs := List([1..nthreads], i-> CreateChannel());
outchs := List([1..nthreads], i-> CreateChannel());
# the loop for each thread
handler := function(i)
local inch, outch, task, res;
inch := inchs[i];
outch := outchs[i];
while true do
task := ReceiveChannel(inch);
# this means we are done
if task = 0 then
return;
fi;
IsRange(task[3]);
Print("Th ", i,": ",task[3],"\n");
res := CallFuncList(TrigonalizeSubset, task);
# we add i to indicate which thread is free
Add(res, i);
SendChannel(outch, res);
od;
end;
threads := [];
for i in [1..nthreads] do
threads[i] := CreateThread(handler, i);
od;
for round in [1..rounds] do
ilen := QuoInt(len, num);
# we start with the last interval since this seems to take longer sometimes
j := num;
nres := 0;
intv := [(num-1)*ilen+1..len];
# first feed all threads
for k in [1..nthreads] do
SendChannel(inchs[k], [mat, pivots, intv]);
j := j-1;
intv := (j-1)*ilen+[1..ilen];
od;
while j > 0 do
res := MyReceiveAnyChannel(outchs);
nres := nres+1;
mat{res[3]} := res[1];
pivots{res[3]} := res[2];
SendChannel(inchs[res[4]], [mat,pivots,intv]);
j := j-1;
intv := (j-1)*ilen+[1..ilen];
od;
while nres < num do
res := MyReceiveAnyChannel(outchs);
nres := nres+1;
mat{res[3]} := res[1];
pivots{res[3]} := res[2];
od;
# now this round is done and we sort
SortParallel(pivots,mat);
od;
# the final cleanup is single threaded
for i in [1..nthreads] do
# close threads and channels
SendChannel(inchs[i], 0);
WaitThread(threads[i]);
DestroyChannel(inchs[i]);
DestroyChannel(outchs[i]);
od;
intv := [1..len];
res := TrigonalizeSubset(mat, pivots, intv);
mat{intv} := res[1];
pivots{intv} := res[2];
return [mat, pivots];
end;
#############################################################################
##
## This is a generic function to start a thread. It receives tasks to do from
## an input channel.
## A task is a pair [workfunc, args] of a function and the list of its
## arguments, the function must return something.
## The result res = CallFuncList(workfunc, args) is sent into the output
## channel in the form [res, id], such that id allows to identify this thread.
##
## The task can also be 0 and this tells the thread to terminate.
##
ThreadFunction := function(id, inch, outch)
local task, res;
while true do
# the tasks we get are inputs to TrigonalizeSubset or 0 to finish
task := ReceiveChannel(inch);
# a task 0 means we are done
if task = 0 then
return;
fi;
res := CallFuncList(task[1], task[2]);
# we return res and id such that the master knows which thread is free
SendChannel(outch, [res,id]);
od;
end;
# create n threads with n input and n output channels
CreateChannelsAndThreads := function(n)
local inchs, outchs, threads, i;
inchs := List([1..n], i-> CreateChannel());
outchs := List([1..n], i-> CreateChannel());
# now start threads
threads := [];
for i in [1..n] do
threads[i] := CreateThread(ThreadFunction, i, inchs[i], outchs[i]);
od;
return [threads, inchs, outchs];
end;
# and destroy them, arg is output of last function
DestroyChannelsAndThreads := function(chths)
local n, i;
n := Length(chths[1]);
for i in [1..n] do
# send stop tasks
SendChannel(chths[2][i], 0);
od;
for i in [1..n] do
WaitThread(chths[1][i]);
DestroyChannel(chths[2][i]);
DestroyChannel(chths[3][i]);
od;
end;
#############################################################################
##
## With this function we distribute a list of tasks among a list of threads,
## created with the functions above.
##
## Arguments:
## threads is list of threads,
## inchs and outchs the same number of channels,
## tasks can be a list or an enumerator,
## ntasks the number of tasks, and
## handler a function that is applied to the results of each task.
##
DistributeTasks := function(threads, inchs, outchs, tasks, ntasks, handler)
local nthreads, nres, res, i;
nthreads := Length(threads);
nres := 0;
for i in [1..nthreads] do
# first feed all threads
SendChannel(inchs[i], tasks[i]);
od;
for i in [nthreads+1..ntasks] do
# wait for a thread sending the result and feed it with the next task
res := MyReceiveAnyChannel(outchs);
nres := nres+1;
# send the next task to this thread number res[2]
SendChannel(inchs[res[2]], tasks[i]);
# call the handler function
handler(res[1]);
od;
# all tasks are sent away, now collect the remaining results
while nres < ntasks do
res := MyReceiveAnyChannel(outchs);
nres := nres+1;
handler(res[1]);
od;
end;
#############################################################################
##
## With these utilities it is easy to write a threaded version
## of TrigonalizeBySubsets.
##
TrigonalizeByThreads := function(mat, nthreads, mult, rounds)
local len, pivots, num, qr, intervals, e, b, thchs, res, round, i;
len := Length(mat);
pivots := 0*[1..len];
num := mult * nthreads;
# find len/num intervals of [1..len] (is there an elegant one-liner?)
qr := QuotientRemainder(len, num);
intervals := [];
e := len; b := len-qr[1]+1;
while b >= 0 do
Add(intervals, [b..e]);
e := b-1;
if qr[2] > 0 then
b := e-qr[1];
qr[2] := qr[2]-1;
else
b := e-qr[1]+1;
fi;
od;
# create the threads
thchs := CreateChannelsAndThreads(nthreads);
for round in [1..rounds] do
DistributeTasks(thchs[1], thchs[2], thchs[3], List(intervals, intv ->
[TrigonalizeSubset, [mat, pivots, intv]]), num,
function(res) mat{res[3]} := res[1]; pivots{res[3]} := res[2]; end);
# do we need an explicit memory barrier here?
SortParallel(pivots,mat);
od;
# clean out the rest
res := TrigonalizeSubset(mat, pivots, [1..len]);
# That is it for now. But we can easily add some code here to also cover
# the cases of semi-echelon form and full-echelon form computations.
# ...
# destroys channels and threads
DestroyChannelsAndThreads(thchs);
return [mat, pivots];
end;
[ Dauer der Verarbeitung: 0.34 Sekunden
(vorverarbeitet)
]
|