I have a process for a sort of genetic nesting algorithm that I am trying to multi-thread. The process looks something like the following.
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace ConsoleApp1
{
class Program
{
static void Main(string[] args)
{
CurrentNest = new CuttingRun();
for (int i = 0; i < 80; i++)
{
double w = GetRandomNumber(24, 50);
double h = GetRandomNumber(10, 15);
CurrentNest.PartList.Add(new LwCube { Width = w, Height = h, PartID = i });
}
//Task.Run(() =>
//{
// Parallel.For(0, 64, (i) => Parallel_Nest());
//});
while (true)
{
Parallel_Nest();
}
//Console.ReadKey();
}
public static double GetRandomNumber(double minimum, double maximum)
{
Random random = new Random();
return random.NextDouble() * (maximum - minimum) + minimum;
}
public static CuttingRun CurrentNest { get; set; }
public static void Parallel_Nest()
{
Random random = new Random();
int randomNumber = random.Next(2000, 10000);
var retVal = Nester.Nest_Parts(CurrentNest, randomNumber);
CurrentNest.Iterations++;
if (CurrentNest.SavedList.Count > 0)
{
if (retVal.Count < CurrentNest.SavedList.Count)
{
CurrentNest.SavedList = retVal;
}
}
else
{
CurrentNest.SavedList = retVal;
}
Console.WriteLine(CurrentNest.Iterations.ToString() + " " + CurrentNest.SavedList.Count.ToString());
if (CurrentNest.SavedList != retVal)
{
retVal.Clear();
}
}
}
//Models
public class LwSheet
{
public LwSheet(double width, double height)
{
SheetWidth = width;
SheetHeight = height;
FreeRectangles.Add(new LwCube { Width = width, Height = height, X = 0, Y = 0 });
}
public List<LwCube> UsedRectangles = new List<LwCube>();
public List<LwCube> FreeRectangles = new List<LwCube>();
public double SheetWidth { get; set; }
public double SheetHeight { get; set; }
public double TotalUsed { get; set; }
public bool Place_Part(LwCube prt)
{
bool retVal = false;
LwCube bestNode = FindPositionForBestAreaFit(prt);
//if the bestNode has a height then add our parts to the list
if (bestNode.Height > 0)
{
bestNode.PartID = prt.PartID;
int numRectanglesToProcess = FreeRectangles.Count;
for (int i = 0; i < numRectanglesToProcess; ++i)
{
if (SplitFreeNode(FreeRectangles[i], ref bestNode))
{
FreeRectangles.RemoveAt(i);
--i;
--numRectanglesToProcess;
}
}
PruneFreeList();
UsedRectangles.Add(bestNode);
retVal = true;
}
return retVal;
}
bool SplitFreeNode(LwCube freeNode, ref LwCube usedNode)
{
// Test with SAT if the rectangles even intersect.
if (usedNode.X >= freeNode.X + freeNode.Width || usedNode.X + usedNode.Width <= freeNode.X ||
usedNode.Y >= freeNode.Y + freeNode.Height || usedNode.Y + usedNode.Height <= freeNode.Y)
return false;
if (usedNode.X < freeNode.X + freeNode.Width && usedNode.X + usedNode.Width > freeNode.X)
{
// New node at the top side of the used node.
if (usedNode.Y > freeNode.Y && usedNode.Y < freeNode.Y + freeNode.Height)
{
LwCube newNode = new LwCube { Width = freeNode.Width, X = freeNode.X, Y = freeNode.Y };
newNode.Height = usedNode.Y - newNode.Y;
FreeRectangles.Add(newNode);
}
// New node at the bottom side of the used node.
if (usedNode.Y + usedNode.Height < freeNode.Y + freeNode.Height)
{
LwCube newNode = new LwCube { Width = freeNode.Width, X = freeNode.X };
newNode.Y = usedNode.Y + usedNode.Height;
newNode.Height = freeNode.Y + freeNode.Height - (usedNode.Y + usedNode.Height);
FreeRectangles.Add(newNode);
}
}
if (usedNode.Y < freeNode.Y + freeNode.Height && usedNode.Y + usedNode.Height > freeNode.Y)
{
// New node at the left side of the used node.
if (usedNode.X > freeNode.X && usedNode.X < freeNode.X + freeNode.Width)
{
LwCube newNode = new LwCube { Height = freeNode.Height, X = freeNode.X, Y = freeNode.Y };
newNode.Width = usedNode.X - newNode.X;
FreeRectangles.Add(newNode);
}
// New node at the right side of the used node.
if (usedNode.X + usedNode.Width < freeNode.X + freeNode.Width)
{
LwCube newNode = new LwCube { Height = freeNode.Height, Y = freeNode.Y };
newNode.X = usedNode.X + usedNode.Width;
newNode.Width = freeNode.X + freeNode.Width - (usedNode.X + usedNode.Width);
FreeRectangles.Add(newNode);
}
}
return true;
}
void PruneFreeList()
{
for (int i = 0; i < FreeRectangles.Count; ++i)
for (int j = i + 1; j < FreeRectangles.Count; ++j)
{
if (IsContainedIn(FreeRectangles[i], FreeRectangles[j]))
{
FreeRectangles.RemoveAt(i);
--i;
break;
}
if (IsContainedIn(FreeRectangles[j], FreeRectangles[i]))
{
FreeRectangles.RemoveAt(j);
--j;
}
}
}
bool IsContainedIn(LwCube a, LwCube b)
{
return a.X >= b.X && a.Y >= b.Y
&& a.X + a.Width <= b.X + b.Width
&& a.Y + a.Height <= b.Y + b.Height;
}
LwCube FindPositionForBestAreaFit(LwCube prt)
{
LwCube bestNode = new LwCube();
var bestAreaFit = SheetWidth * SheetHeight;
for (int i = 0; i < FreeRectangles.Count; ++i)
{
double areaFit = FreeRectangles[i].Width * FreeRectangles[i].Height - prt.Width * prt.Height;
// Try to place the rectangle in upright (non-flipped) orientation.
if (FreeRectangles[i].Width >= prt.Width && FreeRectangles[i].Height >= prt.Height)
{
if (areaFit < bestAreaFit)
{
bestNode.X = FreeRectangles[i].X;
bestNode.Y = FreeRectangles[i].Y;
bestNode.Height = prt.Height;
bestNode.Width = prt.Width;
bestAreaFit = areaFit;
}
}
}
return bestNode;
}
}
public class LwCube
{
public int PartID { get; set; }
public double Width { get; set; }
public double Height { get; set; }
public double X { get; set; }
public double Y { get; set; }
}
public class CuttingRun
{
public List<LwCube> PartList = new List<LwCube>();
public List<LwSheet> SavedList = new List<LwSheet>();
public List<LwSheet> Sheets = new List<LwSheet>();
public int Iterations { get; set; }
}
//Actions
public static class Nester
{
public static List<LwSheet> Nest_Parts(CuttingRun cuttingRun, int loopCount)
{
var SheetList = new List<LwSheet>();
List<LwCube> partList = new List<LwCube>();
partList.AddRange(cuttingRun.PartList);
while (partList.Count > 0)
{
LwSheet newScore = new LwSheet(97, 49);
List<LwCube> addingParts = new List<LwCube>();
foreach (var prt in partList)
{
addingParts.Add(new LwCube { Width = prt.Width, Height = prt.Height, PartID = prt.PartID });
}
if (addingParts.Count > 0)
{
var sheets = new ConcurrentBag<LwSheet>();
Parallel.For(0, loopCount, (i) =>
{
var hmr = new LwSheet(97, 49);
Add_Parts_To_Sheet(hmr, addingParts);
sheets.Add(hmr);
});
//for (int i = 0; i < loopCount; i++)
//{
// var hmr = new LwSheet(97, 49);
// Add_Parts_To_Sheet(hmr, addingParts, addToLarge, addToMedium);
// sheets.Add(hmr);
//}
addingParts.Clear();
var bestSheet = sheets.Where(p => p != null).OrderByDescending(p => p.TotalUsed).First();
sheets = null;
newScore = bestSheet;
foreach (var ur in newScore.UsedRectangles)
{
partList.Remove(partList.Single(p => p.PartID == ur.PartID));
}
SheetList.Add(newScore);
}
}
return SheetList;
}
public static void Add_Parts_To_Sheet(LwSheet sh, List<LwCube> parts)
{
var myList = new List<LwCube>();
myList.AddRange(parts);
myList.Shuffle();
foreach (var prt in myList)
{
sh.Place_Part(prt);
}
myList.Clear();
foreach (var ur in sh.UsedRectangles)
{
sh.TotalUsed += ur.Width * ur.Height;
}
}
[ThreadStatic] private static Random Local;
public static Random ThisThreadsRandom
{
get { return Local ?? (Local = new Random(unchecked(Environment.TickCount * 31 + System.Threading.Thread.CurrentThread.ManagedThreadId))); }
}
public static void Shuffle<T>(this IList<T> list)
{
int n = list.Count;
while (n > 1)
{
n--;
int k = ThisThreadsRandom.Next(n + 1);
T value = list[k];
list[k] = list[n];
list[n] = value;
}
}
}
}
I have tried using parallel for loops on each of the loops to try and speed up the process. I have also tried changing them to tasks and used task.WhenAll. However I am only able to use around 25% of my CPU. If I start the program 4 different times, I can use 100%.
I am wondering if anyone has any ideas on how I could use 100% of my CPU without starting the program more than once?
EDIT: After adding a scaled down working version I also commented out one of the parallel loops and one of the normal loops to show where I put them in the code.
However I am only able to use around 25% of my CPU. If I start the program 4 different times, I can use 100%. I am wondering if anyone has any ideas on how I could use 100% of my CPU without starting the program more than once?
Your code appears to be a mixture of asynchronous (presumably I/O-bound) and parallel (presumably CPU-bound) portions. I say "appears to be" because we can't say for sure where the problem is since this is not a minimal reproducible example.
But, if that assumption is correct, then the reason your CPU is underutilized is simple: the parallel CPU-bound portions are waiting for their input data from the asynchronous I/O-bound portions. The only way to fix that is to run the I/O-bound portions concurrently. Move your I/O-bound code as early in the pipeline as possible, and then be sure to run the I/O-bound portions as concurrently as possible. E.g., if you have to call a WebApi for each item, call it as soon as you have the item; or if you're reading items from a database, try to read as many in a batch as possible. This is to minimize the amount of time that the CPU-bound portions have to wait for their data.
"Asynchronous Parallel ForEach" is rarely a good tool for this kind of problem. I would either look into TPL Dataflow or build your own pipeline using Channels.
At the end of the day, it is possible that the algorithm as a whole is I/O-bound. In that case, there isn't a whole lot you can do: only one CPU would be used because the I/O couldn't even keep up with that single CPU, and in that case using more CPUs wouldn't provide any benefit.
my task is, to sum up numbers in some range, to achieve that I have to use threads to separate computation.
I divided number to parts and used a thread for each part.
public class ParallelCalc
{
public long resultLong;
private Thread[] threads;
private List<long> list = new List<long>();
public long MaxNumber { get; set; }
public int ThreadsNumber { get; set; }
public event CalcFinishedEventHandler finished;
public ParallelCalc(long MaxNumber, int ThreadsNumber)
{
this.MaxNumber = MaxNumber;
this.ThreadsNumber = ThreadsNumber;
this.threads = new Thread[ThreadsNumber];
}
public void Start()
{
Stopwatch sw = new Stopwatch();
for (int i = 0; i < ThreadsNumber; i++)
{
threads[i] = new Thread(() => Sum(((MaxNumber / ThreadsNumber) * i) + 1,
MaxNumber / ThreadsNumber * (i + 1)));
if (i == ThreadsNumber - 1)
{
threads[i] = new Thread(() => Sum(((MaxNumber / ThreadsNumber) * i) + 1,
MaxNumber));
}
sw.Start();
threads[i].Start();
}
while (threads.All(t => t.IsAlive));
sw.Stop();
finished?.Invoke(this,
new CalcFinishedEventArgs()
{
Result = list.Sum(),
Time = sw.ElapsedMilliseconds
});
}
private void Sum(long startNumber, long endnumber)
{
long result = 0;
for (long i = startNumber; i <= endnumber; i++)
{
result += i;
}
list.Add(result);
}
}
The result has to be the sum of numbers, however, it is incorrect due to thread asynchronous assignment in list. Please indicate the error.
There is more than one thing wrong here, brace yourself...
Start creates a Stopwatch sw, but you call sw.Start on every iteration of the loop. Start it only once.
if i == ThreadsNumber - 1 evaluates to true, you let Thread to garbage. I fail to grasp why...
(MaxNumber / ThreadsNumber) * (i + 1) WHEN i == ThreadsNumber - 1
=
(MaxNumber / ThreadsNumber) * (ThreadsNumber - 1 + 1)
=
(MaxNumber / ThreadsNumber) * (ThreadsNumber)
=
MaxNumber
Do you have rounding problems? Rewrite like this:
((i + 1) * MaxNumber) / ThreadsNumber
By dividing last, you avoid the rounding problem.
You are spin waiting on the threads while (threads.All(t => t.IsAlive));. You could as well use Thread.Join or better yet, let the threads notify you when they are done.
The ranges in the lambdas have a closure on i. You need to be careful with C# - For loop and the lambda expressions.
List<T> is not thread safe. I would suggest to use a simple array (you know the number of threads afterall) and tell each thread to store only on the position that corresponds to them.
You have not considered what would happen if a second call to Start happens before the first one finishes.
So, we will have an array for the output:
var output = new long[ThreadsNumber];
And one for the Threads:
var threads = new Thread[ThreadsNumber];
Hmm, almost like we should create a class.
We will have the stopwatch:
var sw = new Stopwatch();
Let us start it once:
sw.Start();
Now a for to create the Threads:
for (var i = 0; i < ThreadsNumber; i++)
{
// ...
}
Have a copy of i to prevent problems:
for (var i = 0; i < ThreadsNumber; i++)
{
var index = i;
// ...
}
Compute the range for the current thread:
for (var i = 0; i < ThreadsNumber; i++)
{
var index = i;
var start = 1 + (i * MaxNumber) / ThreadsNumber;
var end = ((i + 1) * MaxNumber) / ThreadsNumber;
// ...
}
We need to write Sum in such way that we can store the output in the array:
private void Sum(long startNumber, long endNumber, int index)
{
long result = 0;
for (long i = startNumber; i <= endnumber; i++)
{
result += i;
}
output[index] = result;
}
Hmm... wait, there is a better way...
private static void Sum(long startNumber, long endNumber, out long output)
{
long result = 0;
for (long i = startNumber; i <= endNumber; i++)
{
result += i;
}
output = result;
}
Hmm... no, we can do better...
private static long Sum(long startNumber, long endNumber)
{
long result = 0;
for (long i = startNumber; i <= endNumber; i++)
{
result += i;
}
return result;
}
Create the Thread
for (var i = 0; i < ThreadsNumber; i++)
{
var index = i;
var start = 1 + (i * MaxNumber) / ThreadsNumber;
var end = ((i + 1) * MaxNumber) / ThreadsNumber;
threads[i] = new Thread(() => output[index] = Sum(start, end));
// ...
}
And start the Thread:
for (var i = 0; i < ThreadsNumber; i++)
{
var index = i;
var start = 1 + (i * MaxNumber) / ThreadsNumber;
var end = ((i + 1) * MaxNumber) / ThreadsNumber;
threads[i] = new Thread(() => {output[index] = Sum(start, end);});
threads[i].Start();
}
Are we really going to wait on these?
Think, think...
We keep track of how many threads are pending... and when they are all done, we call the event (and stop the Stopwatch).
var pendingThreads = ThreadsNumber;
// ...
for (var i = 0; i < ThreadsNumber; i++)
{
// ...
threads[i] = new Thread
(
() =>
{
output[index] = Sum(start, end);
if (Interlocked.Decrement(ref pendingThreads) == 0)
{
sw.Stop();
finished?.Invoke
(
this,
new CalcFinishedEventArgs()
{
Result = output.Sum(),
Time = sw.ElapsedMilliseconds
}
);
}
}
);
// ...
}
Let us bring it all togheter:
void Main()
{
var pc = new ParallelCalc(20, 5);
pc.Finished += (sender, args) =>
{
Console.WriteLine(args);
};
pc.Start();
}
public class CalcFinishedEventArgs : EventArgs
{
public long Result {get; set;}
public long Time {get; set;}
}
public class ParallelCalc
{
public long MaxNumber { get; set; }
public int ThreadsNumber { get; set; }
public event EventHandler<CalcFinishedEventArgs> Finished;
public ParallelCalc(long MaxNumber, int ThreadsNumber)
{
this.MaxNumber = MaxNumber;
this.ThreadsNumber = ThreadsNumber;
}
public void Start()
{
var output = new long[ThreadsNumber];
var threads = new Thread[ThreadsNumber];
var pendingThreads = ThreadsNumber;
var sw = new Stopwatch();
sw.Start();
for (var i = 0; i < ThreadsNumber; i++)
{
var index = i;
var start = 1 + (i * MaxNumber) / ThreadsNumber;
var end = ((i + 1) * MaxNumber) / ThreadsNumber;
threads[i] = new Thread
(
() =>
{
output[index] = Sum(start, end);
if (Interlocked.Decrement(ref pendingThreads) == 0)
{
sw.Stop();
Finished?.Invoke
(
this,
new CalcFinishedEventArgs()
{
Result = output.Sum(),
Time = sw.ElapsedMilliseconds
}
);
}
}
);
threads[i].Start();
}
}
private static long Sum(long startNumber, long endNumber)
{
long result = 0;
for (long i = startNumber; i <= endNumber; i++)
{
result += i;
}
return result;
}
}
Output:
Result
210
Time
0
That is too fast... let me input:
var pc = new ParallelCalc(2000000000, 5);
pc.Finished += (sender, args) =>
{
Console.WriteLine(args);
};
pc.Start();
Output:
Result
2000000001000000000
Time
773
And that is correct.
And yes, this code takes care of the case of calling Start multiple times. Notice that it create a new array for the output and a new array of threads each time. That way, it does not trip over itself.
I let error handling to you. Hints: MaxNumber / ThreadsNumber -> division by 0, and (i + 1) * MaxNumber -> overflow, not to mention output.Sum() -> overflow.
Good afternoon guys, I have a following problem of the school to solve, being it in relation to Collatz Problem, where I need to develop an application that will find out the starting number between 1 and 1 million that produces the largest sequence.
I made the following code without applying LINQ, and would like to know how to use linq in this situation.
var sequence_size = 0;
var best_number = 0;
var sequence = 0;
for (var i = 0; i <= 1000000; i ++)
{
var size_ = 1;
sequence = i;
while (sequence! = 1)
{
sequence = sequence% 2 == 0? sequence / 2: sequence * 3 + 1;
size ++;
}
if (size> size)
{
size_sequence = size;
best_number = i;
}
}
This is what I'd recommend:
public class MaxSequenceFinder
{
public int FindBestCollatzSequence(int start, int count)
{
return Enumerable.Range(start, count)
.Select(n => new { Number = n, SequenceLength = CalcCollatzSequence((long)n) })
.Aggregate((i, j) => i.SequenceLength > j.SequenceLength ? i : j)
.Number;
}
private int CalcCollatzSequence(long n)
{
int sequenceLength = 0;
do
{
n = CalcNextTerm(n);
sequenceLength++;
}
while (n != 1);
return sequenceLength;
}
private long CalcNextTerm(long previousTerm)
{
return previousTerm % 2 == 0 ? previousTerm / 2 : previousTerm * 3 + 1;
}
}
Which can be used like this:
var finder = new MaxSequenceFinder();
int result = finder.FindBestCollatzSequence(1, 100);
Console.WriteLine(result);
Running it for a count of 1 000 000 will take some time. But in this case (< 100) the longest sequence is:
97
Create an extension function to find the maximum of a sequence using a function on each element:
public static class Ext {
public static T MaxBy<T, TKey>(this IEnumerable<T> src, Func<T, TKey> key, Comparer<TKey> keyComparer = null) {
keyComparer = keyComparer ?? Comparer<TKey>.Default;
return src.Aggregate((a,b) => keyComparer.Compare(key(a), key(b)) > 0 ? a : b);
}
}
Create a function to return the Collatz length of a number:
public long Collatz(long seq) {
long len = 0;
while (seq != 1) {
if (seq % 2 == 0) {
seq /= 2;
++len;
}
else {
seq = (3 * seq + 1) / 2;
len += 2;
}
}
return len;
}
Then you can use these and LINQ to computer the answer for a range:
var maxn = Enumerable.Range(2, 1000000-2).Select(n => new { n, Collatz = Collatz(n) }).MaxBy(nc => nc.Collatz).n;
It take pretty long time to complete the limit(n) of 100,000.
I suspect that the problem is with the CalculateAmicable() where the number get bigger and it take more time to calculate. What can I change to make it speed up faster than this?
public static void Main (string[] args)
{
CheckAmicable (1, 100000);
}
public static int CheckAmicable(int start, int end)
{
for (int i = start; i < end; i++) {
int main = CalculateAmicable (i); //220
int temp = CalculateAmicable (main); //284
int compare = CalculateAmicable (temp); //220
if (compare == main) {
if (main != temp && temp == i) {
Console.WriteLine (main + " = " + temp + " = " + compare + " i: " + i);
i = compare + 1;
}
}
}
return 0;
}
public static int CalculateAmicable(int number)
{
int total = 0;
for (int i = 1; i < number; i++) {
if (number%i == 0){
total += i;
}
}
return total;
}
Note: English is not my first language!
Yes, CalculateAmicable is inefficient - O(N) complexity - and so you have fo N tests 1e5 * 1e5 == 1e10 - ten billions operations which is slow.
The scheme which reduces complexity to O(log(N)) is
int n = 100000;
//TODO: implement IList<int> GetPrimesUpTo(int) yourself
var primes = GetPrimesUpTo((int)(Math.Sqrt(n + 1) + 1));
// Key - number itself, Value - divisors' sum
var direct = Enumerable
.Range(1, n)
.AsParallel()
.ToDictionary(index => index,
index => GetDivisorsSum(index, primes) - index);
var result = Enumerable
.Range(1, n)
.Where(x => x < direct[x] &&
direct.ContainsKey((int) direct[x]) &&
direct[(int) direct[x]] == x)
.Select(x => $"{x,5}, {direct[x],5}");
Console.Write(string.Join(Environment.NewLine, result));
The outcome is within a second (Core i7 3.2Ghz .Net 4.6 IA-64):
220, 284
1184, 1210
2620, 2924
5020, 5564
6232, 6368
10744, 10856
12285, 14595
17296, 18416
63020, 76084
66928, 66992
67095, 71145
69615, 87633
79750, 88730
Details GetDivisorsSum:
private static long GetDivisorsSum(long value, IList<int> primes) {
HashSet<long> hs = new HashSet<long>();
IList<long> divisors = GetPrimeDivisors(value, primes);
ulong n = (ulong) 1;
n = n << divisors.Count;
long result = 1;
for (ulong i = 1; i < n; ++i) {
ulong v = i;
long p = 1;
for (int j = 0; j < divisors.Count; ++j) {
if ((v % 2) != 0)
p *= divisors[j];
v = v / 2;
}
if (hs.Contains(p))
continue;
result += p;
hs.Add(p);
}
return result;
}
And GetPrimeDivisors:
private static IList<long> GetPrimeDivisors(long value, IList<int> primes) {
List<long> results = new List<long>();
int v = 0;
long threshould = (long) (Math.Sqrt(value) + 1);
for (int i = 0; i < primes.Count; ++i) {
v = primes[i];
if (v > threshould)
break;
if ((value % v) != 0)
continue;
while ((value % v) == 0) {
value = value / v;
results.Add(v);
}
threshould = (long) (Math.Sqrt(value) + 1);
}
if (value > 1)
results.Add(value);
return results;
}
I need to "Find the minimal positive integer not occurring in a given sequence. "
A[0] = 1
A[1] = 3
A[2] = 6
A[3] = 4
A[4] = 1
A[5] = 2, the function should return 5.
Assume that:
N is an integer within the range [1..100,000];
each element of array A is an integer within the range [−2,147,483,648..2,147,483,647].
I wrote the code in codility, but for many cases it did not worked and the performance test gives 0 %. Please help me out, where I am wrong.
class Solution {
public int solution(int[] A) {
if(A.Length ==0) return -1;
int value = A[0];
int min = A.Min();
int max = A.Max();
for (int j = min+1; j < max; j++)
{
if (!A.Contains(j))
{
value = j;
if(value > 0)
{
break;
}
}
}
if(value > 0)
{
return value;
}
else return 1;
}
}
The codility gives error with all except the example, positive and negative only values.
Edit: Added detail to answer your actual question more directly.
"Please help me out, where I am wrong."
In terms of correctness: Consider A = {7,2,5,6,3}. The correct output, given the contents of A, is 1, but our algorithm would fail to detect this since A.Min() would return 2 and we would start looping from 3 onward. In this case, we would return 4 instead; since it's the next missing value.
Same goes for something like A = {14,15,13}. The minimal missing positive integer here is again 1 and, since all the values from 13-15 are present, the value variable will retain its initial value of value=A[0] which would be 14.
In terms of performance: Consider what A.Min(), A.Max() and A.Contains() are doing behind the scenes; each one of these is looping through A in its entirety and in the case of Contains, we are calling it repeatedly for every value between the Min() and the lowest positive integer we can find. This will take us far beyond the specified O(N) performance that Codility is looking for.
By contrast, here's the simplest version I can think of that should score 100% on Codility. Notice that we only loop through A once and that we take advantage of a Dictionary which lets us use ContainsKey; a much faster method that does not require looping through the whole collection to find a value.
using System;
using System.Collections.Generic;
class Solution {
public int solution(int[] A) {
// the minimum possible answer is 1
int result = 1;
// let's keep track of what we find
Dictionary<int,bool> found = new Dictionary<int,bool>();
// loop through the given array
for(int i=0;i<A.Length;i++) {
// if we have a positive integer that we haven't found before
if(A[i] > 0 && !found.ContainsKey(A[i])) {
// record the fact that we found it
found.Add(A[i], true);
}
}
// crawl through what we found starting at 1
while(found.ContainsKey(result)) {
// look for the next number
result++;
}
// return the smallest positive number that we couldn't find.
return result;
}
}
The simplest solution that scored perfect score was:
public int solution(int[] A)
{
int flag = 1;
A = A.OrderBy(x => x).ToArray();
for (int i = 0; i < A.Length; i++)
{
if (A[i] <= 0)
continue;
else if (A[i] == flag)
{
flag++;
}
}
return flag;
}
Fastest C# solution so far for [-1,000,000...1,000,000].
public int solution(int[] array)
{
HashSet<int> found = new HashSet<int>();
for (int i = 0; i < array.Length; i++)
{
if (array[i] > 0)
{
found.Add(array[i]);
}
}
int result = 1;
while (found.Contains(result))
{
result++;
}
return result;
}
A tiny version of another 100% with C#
using System.Linq;
class Solution
{
public int solution(int[] A)
{
// write your code in C# 6.0 with .NET 4.5 (Mono)
var i = 0;
return A.Where(a => a > 0).Distinct().OrderBy(a => a).Any(a => a != (i = i + 1)) ? i : i + 1;
}
}
A simple solution that scored 100% with C#
int Solution(int[] A)
{
var A2 = Enumerable.Range(1, A.Length + 1);
return A2.Except(A).First();
}
public class Solution {
public int solution( int[] A ) {
return Arrays.stream( A )
.filter( n -> n > 0 )
.sorted()
.reduce( 0, ( a, b ) -> ( ( b - a ) > 1 ) ? a : b ) + 1;
}
}
It seemed easiest to just filter out the negative numbers. Then sort the stream. And then reduce it to come to an answer. It's a bit of a functional approach, but it got a 100/100 test score.
Got an 100% score with this solution:
https://app.codility.com/demo/results/trainingUFKJSB-T8P/
public int MissingInteger(int[] A)
{
A = A.Where(a => a > 0).Distinct().OrderBy(c => c).ToArray();
if (A.Length== 0)
{
return 1;
}
for (int i = 0; i < A.Length; i++)
{
//Console.WriteLine(i + "=>" + A[i]);
if (i + 1 != A[i])
{
return i + 1;
}
}
return A.Max() + 1;
}
JavaScript solution using Hash Table with O(n) time complexity.
function solution(A) {
let hashTable = {}
for (let item of A) {
hashTable[item] = true
}
let answer = 1
while(true) {
if(!hashTable[answer]) {
return answer
}
answer++
}
}
The Simplest solution for C# would be:
int value = 1;
int min = A.Min();
int max = A.Max();
if (A.Length == 0) return value = 1;
if (min < 0 && max < 0) return value = 1;
List<int> range = Enumerable.Range(1, max).ToList();
List<int> current = A.ToList();
List<int> valid = range.Except(current).ToList();
if (valid.Count() == 0)
{
max++;
return value = max;
}
else
{
return value = valid.Min();
}
Considering that the array should start from 1 or if it needs to start from the minimum value than the Enumerable.range should start from Min
MissingInteger solution in C
int solution(int A[], int N) {
int i=0,r[N];
memset(r,0,(sizeof(r)));
for(i=0;i<N;i++)
{
if(( A[i] > 0) && (A[i] <= N)) r[A[i]-1]=A[i];
}
for(i=0;i<N;i++)
{
if( r[i] != (i+1)) return (i+1);
}
return (N+1);
}
My solution for it:
public static int solution()
{
var A = new[] { -1000000, 1000000 }; // You can try with different integers
A = A.OrderBy(i => i).ToArray(); // We sort the array first
if (A.Length == 1) // if there is only one item in the array
{
if (A[0]<0 || A[0] > 1)
return 1;
if (A[0] == 1)
return 2;
}
else // if there are more than one item in the array
{
for (var i = 0; i < A.Length - 1; i++)
{
if (A[i] >= 1000000) continue; // if it's bigger than 1M
if (A[i] < 0 || (A[i] + 1) >= (A[i + 1])) continue; //if it's smaller than 0, if the next integer is bigger or equal to next integer in the sequence continue searching.
if (1 < A[0]) return 1;
return A[i] + 1;
}
}
if (1 < A[0] || A[A.Length - 1] + 1 == 0 || A[A.Length - 1] + 1 > 1000000)
return 1;
return A[A.Length-1] +1;
}
class Solution {
public int solution(int[] A) {
int size=A.length;
int small,big,temp;
for (int i=0;i<size;i++){
for(int j=0;j<size;j++){
if(A[i]<A[j]){
temp=A[j];
A[j]=A[i];
A[i]=temp;
}
}
}
int z=1;
for(int i=0;i<size;i++){
if(z==A[i]){
z++;
}
//System.out.println(a[i]);
}
return z;
}
enter code here
}
In C# you can solve the problem by making use of built in library functions. How ever the performance is low for very large integers
public int solution(int[] A)
{
var numbers = Enumerable.Range(1, Math.Abs(A.Max())+1).ToArray();
return numbers.Except(A).ToArray()[0];
}
Let me know if you find a better solution performance wise
C# - MissingInteger
Find the smallest missing integer between 1 - 1000.000.
Assumptions of the OP take place
TaskScore/Correctness/Performance: 100%
using System;
using System.Linq;
namespace TestConsole
{
class Program
{
static void Main(string[] args)
{
var A = new int[] { -122, -5, 1, 2, 3, 4, 5, 6, 7 }; // 8
var B = new int[] { 1, 3, 6, 4, 1, 2 }; // 5
var C = new int[] { -1, -3 }; // 1
var D = new int[] { -3 }; // 1
var E = new int[] { 1 }; // 2
var F = new int[] { 1000000 }; // 1
var x = new int[][] { A, B, C, D, E, F };
x.ToList().ForEach((arr) =>
{
var s = new Solution();
Console.WriteLine(s.solution(arr));
});
Console.ReadLine();
}
}
// ANSWER/SOLUTION
class Solution
{
public int solution(int[] A)
{
// clean up array for negatives and duplicates, do sort
A = A.Where(entry => entry > 0).Distinct().OrderBy(it => it).ToArray();
int lowest = 1, aLength = A.Length, highestIndex = aLength - 1;
for (int i = 0; i < aLength; i++)
{
var currInt = A[i];
if (currInt > lowest) return lowest;
if (i == highestIndex) return ++lowest;
lowest++;
}
return 1;
}
}
}
Got 100% - C# Efficient Solution
public int solution (int [] A){
int len = A.Length;
HashSet<int> realSet = new HashSet<int>();
HashSet<int> perfectSet = new HashSet<int>();
int i = 0;
while ( i < len)
{
realSet.Add(A[i]); //convert array to set to get rid of duplicates, order int's
perfectSet.Add(i + 1); //create perfect set so can find missing int
i++;
}
perfectSet.Add(i + 1);
if (realSet.All(item => item < 0))
return 1;
int notContains =
perfectSet.Except(realSet).Where(item=>item!=0).FirstOrDefault();
return notContains;
}
class Solution {
public int solution(int[] a) {
int smallestPositive = 1;
while(a.Contains(smallestPositive)) {
smallestPositive++;
}
return smallestPositive;
}
}
Well, this is a new winner now. At least on C# and my laptop. It's 1.5-2 times faster than the previous champion and 3-10 times faster, than most of the other solutions. The feature (or a bug?) of this solution is that it uses only basic data types. Also 100/100 on Codility.
public int Solution(int[] A)
{
bool[] B = new bool[(A.Length + 1)];
for (int i = 0; i < A.Length; i++)
{
if ((A[i] > 0) && (A[i] <= A.Length))
B[A[i]] = true;
}
for (int i = 1; i < B.Length; i++)
{
if (!B[i])
return i;
}
return A.Length + 1;
}
Simple C++ solution. No additional memory need, time execution order O(N*log(N)):
int solution(vector<int> &A) {
sort (A.begin(), A.end());
int prev = 0; // the biggest integer greater than 0 found until now
for( auto it = std::begin(A); it != std::end(A); it++ ) {
if( *it > prev+1 ) break;// gap found
if( *it > 0 ) prev = *it; // ignore integers smaller than 1
}
return prev+1;
}
int[] A = {1, 3, 6, 4, 1, 2};
Set<Integer> integers = new TreeSet<>();
for (int i = 0; i < A.length; i++) {
if (A[i] > 0) {
integers.add(A[i]);
}
}
Integer[] arr = integers.toArray(new Integer[0]);
final int[] result = {Integer.MAX_VALUE};
final int[] prev = {0};
final int[] curr2 = {1};
integers.stream().forEach(integer -> {
if (prev[0] + curr2[0] == integer) {
prev[0] = integer;
} else {
result[0] = prev[0] + curr2[0];
}
});
if (Integer.MAX_VALUE == result[0]) result[0] = arr[arr.length-1] + 1;
System.out.println(result[0]);
I was surprised but this was a good lesson. LINQ IS SLOW. my answer below got me 11%
public int solution (int [] A){
if (Array.FindAll(A, x => x >= 0).Length == 0) {
return 1;
} else {
var lowestValue = A.Where(x => Array.IndexOf(A, (x+1)) == -1).Min();
return lowestValue + 1;
}
}
I think I kinda look at this a bit differently but gets a 100% evaluation. Also, I used no library:
public static int Solution(int[] A)
{
var arrPos = new int[1_000_001];
for (int i = 0; i < A.Length; i++)
{
if (A[i] >= 0)
arrPos[A[i]] = 1;
}
for (int i = 1; i < arrPos.Length; i++)
{
if (arrPos[i] == 0)
return i;
}
return 1;
}
public int solution(int[] A) {
// write your code in Java SE 8
Set<Integer> elements = new TreeSet<Integer>();
long lookFor = 1;
for (int i = 0; i < A.length; i++) {
elements.add(A[i]);
}
for (Integer integer : elements) {
if (integer == lookFor)
lookFor += 1;
}
return (int) lookFor;
}
I tried to use recursion in C# instead of sorting, because I thought it would show more coding skill to do it that way, but on the scaling tests it didn't preform well on large performance tests. Suppose it's best to just do the easy way.
class Solution {
public int lowest=1;
public int solution(int[] A) {
// write your code in C# 6.0 with .NET 4.5 (Mono)
if (A.Length < 1)
return 1;
for (int i=0; i < A.Length; i++){
if (A[i]==lowest){
lowest++;
solution(A);
}
}
return lowest;
}
}
Here is my solution in javascript
function solution(A) {
// write your code in JavaScript (Node.js 8.9.4)
let result = 1;
let haveFound = {}
let len = A.length
for (let i=0;i<len;i++) {
haveFound[`${A[i]}`] = true
}
while(haveFound[`${result}`]) {
result++
}
return result
}
class Solution {
public int solution(int[] A) {
var sortedList = A.Where(x => x > 0).Distinct().OrderBy(x => x).ToArray();
var output = 1;
for (int i = 0; i < sortedList.Length; i++)
{
if (sortedList[i] != output)
{
return output;
}
output++;
}
return output;
}
}
You should just use a HashSet as its look up time is also constant instead of a dictionary. The code is less and cleaner.
public int solution (int [] A){
int answer = 1;
var set = new HashSet<int>(A);
while (set.Contains(answer)){
answer++;
}
return answer;
}
This snippet should work correctly.
using System;
using System.Collections.Generic;
public class Program
{
public static void Main()
{
int result = 1;
List<int> lst = new List<int>();
lst.Add(1);
lst.Add(2);
lst.Add(3);
lst.Add(18);
lst.Add(4);
lst.Add(1000);
lst.Add(-1);
lst.Add(-1000);
lst.Sort();
foreach(int curVal in lst)
{
if(curVal <=0)
result=1;
else if(!lst.Contains(curVal+1))
{
result = curVal + 1 ;
}
Console.WriteLine(result);
}
}
}