Related
As a learning exercise, I am writing a library to evaluate the complexity of an algorithm. I do this by seeing how long the alorgorithm takes for given inputs N, and then do a polynomial regression to see if the algorithm is best fitted by N, N^2, log(N), etc. I wrote Unit Test cases and they seem to work for N^2 and LogN. It's the simplest case, N that is giving me grief. For an order N algorithm, I'm using the following:
uint LinearAlgorithm2(uint n)
{
uint returnValue = 7;
for (uint i = 0; i < n; i++)
{
//Thread.Sleep(2);
double y = _randomNumber.NextDouble(); // dummy calculation
if (y < 0.0005)
{
returnValue = 1;
//Console.WriteLine("y " + y + i);
}
else if (y < .05)
{
returnValue = 2;
}
else if (y < .5)
{
returnValue = 3;
}
else
{
returnValue = 7;
}
}
return returnValue;
}
I have all that nonsense code in there simply because I was concerned that the compiler might have been optimizing my loop away. In any case I think the loop is just a simple loop from 0 to n and therefore this is an algorithm or order N.
My unit test code is:
public void TestLinearAlgorithm2()
{
Evaluator evaluator = new Evaluator();
var result = evaluator.Evaluate(LinearAlgorithm2, new List<double>() { 1000,1021, 1065, 1300, 1423, 1599,
1683, 1691, 1692, 1696, 1699, 1705,1709, 1712, 1713, 1717, 1720,
1722, 1822, 2000, 2050, 2090, 2500, 2666, 2700,2701, 2767, 2799, 2822, 2877,
3000, 3100, 3109, 3112, 3117, 3200, 3211, 3216, 3219, 3232, 3500, 3666, 3777,
4000, 4022, 4089, 4122, 4199, 4202, 4222, 5000 });
var minKey = result.Aggregate((l, r) => l.Value < r.Value ? l : r).Key;
Assert.IsTrue(minKey.ToString() == FunctionEnum.N.ToString());
}
And I put the class Evaluator down below. Perhaps before staring at that though I'd ask
1) Do you agree a simple loop 0 to N should be of order N for complexity? I.e. the time to complete the algorithm goes up as n (not nLogn or n^3, etc.)
2) Is there some library code already written to evaluate algorithmic complexity?
3) I'm very suspicious that the problem is one of optimization. Under ProjectSettings->Build in Visual Studio, I have unchecked "Optimize Code". What else should I be doing? One reason I'm suspicious that the compiler is skewing the results is that I print out the times for various input values of n. For 1000 (the first entry) it's 2533, but for 1021 it's only 415! I've put all the results below the Evaluator class.
Thanks for any ideas and let me know if I can provide more info (Github link?) -Dave
Code for Evaluator.cs
using MathNet.Numerics.LinearAlgebra;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
/// <summary>
/// Library to evaluate complexity of algorithm.
/// Pass in method and necessary data
/// There are methods to set the size of the test data
///
/// Evaluate for LogN, N, NLogN, N^2, N^3, 2^N
///
/// Should be able use ideas from
/// https://en.wikipedia.org/wiki/Polynomial_regression
/// to finish problem. Next need matrix multiplication.
/// Or possibly use this:
/// https://www.codeproject.com/Articles/19032/C-Matrix-Library
/// or similar
/// </summary>
namespace BigOEstimator
{
public enum FunctionEnum
{
Constant = 0,
LogN = 1,
N,
NLogN,
NSquared,
NCubed,
TwoToTheN
}
public class Evaluator
{
//private List<uint> _suggestedList = new List<uint>();
private Dictionary<FunctionEnum, double> _results = new Dictionary<FunctionEnum, double>();
public Evaluator()
{
}
public Dictionary<FunctionEnum, double> Evaluate(Func<uint,uint> algorithm, IList<double> suggestedList)
{
Dictionary<FunctionEnum, double> results = new Dictionary<FunctionEnum, double>();
Vector<double> answer = Vector<double>.Build.Dense(suggestedList.Count(), 0.0);
for (int i = 0; i < suggestedList.Count(); i++)
{
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
var result = algorithm((uint) suggestedList[i]);
stopwatch.Stop();
answer[i] = stopwatch.ElapsedTicks;
Console.WriteLine($"Answer for index {suggestedList[i]} is {answer[i]}");
}
// linear case - N
results[FunctionEnum.N] = CalculateResidual(Vector<double>.Build.DenseOfEnumerable(suggestedList), answer, d => d);
// quadratic case - NSquared
results[FunctionEnum.NSquared] = CalculateResidual(Vector<double>.Build.DenseOfEnumerable(suggestedList), answer, d => (d*d));
// cubic case - NCubed
results[FunctionEnum.NCubed] = CalculateResidual(Vector<double>.Build.DenseOfEnumerable(suggestedList), answer, d => (d * d * d));
// NLogN case - NLogN
results[FunctionEnum.NLogN] = CalculateResidual(Vector<double>.Build.DenseOfEnumerable(suggestedList), answer, d => (d * Math.Log(d)));
// LogN case - LogN
results[FunctionEnum.LogN] = CalculateResidual(Vector<double>.Build.DenseOfEnumerable(suggestedList), answer, d => ( Math.Log(d)));
// following few lines are useful for unit tests. You get this by hitting 'Output' on test!
var minKey = results.Aggregate((l, r) => l.Value < r.Value ? l : r).Key;
Console.WriteLine("Minimum Value: Key: " + minKey.ToString() + ", Value: " + results[minKey]);
foreach (var item in results)
{
Console.WriteLine("Test: " + item.Key + ", result: " + item.Value);
}
return results;
}
private double CalculateResidual(Vector<double> actualXs, Vector<double> actualYs, Func<double, double> transform)
{
Matrix<double> m = Matrix<double>.Build.Dense(actualXs.Count, 2, 0.0);
for (int i = 0; i < m.RowCount; i++)
{
m[i, 0] = 1.0;
m[i, 1] = transform((double)actualXs[i]);
}
Vector<double> betas = CalculateBetas(m, actualYs);
Vector<double> estimatedYs = CalculateEstimatedYs(m, betas);
return CalculatateSumOfResidualsSquared(actualYs, estimatedYs);
}
private double CalculateLinearResidual(Vector<double> actualXs, Vector<double> actualYs)
{
Matrix<double> m = Matrix<double>.Build.Dense(actualXs.Count, 2, 0.0);
for (int i = 0; i < m.RowCount; i++)
{
m[i, 0] = 1.0;
m[i, 1] = (double)actualXs[i];
}
Vector<double> betas = CalculateBetas(m, actualYs);
Vector<double> estimatedYs = CalculateEstimatedYs(m, betas);
return CalculatateSumOfResidualsSquared(actualYs, estimatedYs);
}
private Vector<double> CalculateBetas(Matrix<double> m, Vector<double> y)
{
return (m.Transpose() * m).Inverse() * m.Transpose() * y;
}
private Vector<double> CalculateEstimatedYs(Matrix<double> x, Vector<double> beta)
{
return x * beta;
}
private double CalculatateSumOfResidualsSquared(Vector<double> yReal, Vector<double> yEstimated)
{
return ((yReal - yEstimated).PointwisePower(2)).Sum();
}
}
}
Results of one run of unit test (notice discrepancies such as first one!):
Answer for index 1000 is 2533
Answer for index 1021 is 415
Answer for index 1065 is 375
Answer for index 1300 is 450
Answer for index 1423 is 494
Answer for index 1599 is 566
Answer for index 1683 is 427
Answer for index 1691 is 419
Answer for index 1692 is 413
Answer for index 1696 is 420
Answer for index 1699 is 420
Answer for index 1705 is 438
Answer for index 1709 is 595
Answer for index 1712 is 588
Answer for index 1713 is 426
Answer for index 1717 is 433
Answer for index 1720 is 421
Answer for index 1722 is 428
Answer for index 1822 is 453
Answer for index 2000 is 497
Answer for index 2050 is 518
Answer for index 2090 is 509
Answer for index 2500 is 617
Answer for index 2666 is 653
Answer for index 2700 is 673
Answer for index 2701 is 671
Answer for index 2767 is 690
Answer for index 2799 is 685
Answer for index 2822 is 723
Answer for index 2877 is 714
Answer for index 3000 is 746
Answer for index 3100 is 753
Answer for index 3109 is 754
Answer for index 3112 is 763
Answer for index 3117 is 2024
Answer for index 3200 is 772
Answer for index 3211 is 821
Answer for index 3216 is 802
Answer for index 3219 is 788
Answer for index 3232 is 775
Answer for index 3500 is 848
Answer for index 3666 is 896
Answer for index 3777 is 917
Answer for index 4000 is 976
Answer for index 4022 is 972
Answer for index 4089 is 1145
Answer for index 4122 is 1047
Answer for index 4199 is 1031
Answer for index 4202 is 1033
Answer for index 4222 is 1151
Answer for index 5000 is 1588
Minimum Value: Key: NCubed, Value: 5895501.06936747
Test: N, result: 6386524.27502984
Test: NSquared, result: 6024667.62732316
Test: NCubed, result: 5895501.06936747
Test: NLogN, result: 6332154.89282043
Test: LogN, result: 6969133.89207915
I suspect your root issue here is that the runtime for each individual iteration is so low that other factors outside of your control (thread scheduling, cache misses, etc.) are causing significant per-operation variance and dominating the execution time. For a true N^3 algorithm, a relatively small number of N can still produce a reasonably large number of 'cycles', meaning that the variation in the cost of the operation has a chance to average out. For things that are straight O(N) or even O(log(N)), the individual operation variance becomes an issue for smaller N.
To get around this, you need to run the efficient algorithms for more iterations, to give these other effects time to average out. This may mean having to evaluate your initial results at low N and scaling it at a different rate if you see that it's not taking enough time to be meaningful. You'll probably want to scale into the range of taking entire seconds to get good averaging, but you'll have to experiment to determine how much variance still occurs.
The compiler Opimisations are there to make distinct parts of the code faster:
cutting underused temporary variables
adding temporary variables, to avoid having repetitive Array Indexer calls
Aside for accidentally causing race conditions by cutting a temporary variable too much (volatile prevents that), there is nothing I know of them making worse.
Making a N to N^3? Pretty improbable result. More likely you wrote a N^3 by accident and just sometimes the Compiler or the values align to salvage it down to N. There is a reason we programmers leave developing those algorithms to Mathematicians.
One problem is actually measuring the stuff:
Nevermind the the Compiler Optimisation, the Garbage Collector can throw all your measurements into chaos.
Every single string you write, is a class instance. One that has to be created, possibly interned and Garbage Collected.
Also outputting those strings costs massive amounts of time too. It is fairly easy to write code so fast, that the Console.WriteLine() becomes the bottleneck. I regularly run into that issue with robocopy on small files.
The second issue is that LinearAlgorithm2 does not have a linear speed. Each if case it skips, increase the runtime of that loop accordingly. Given that NextDouble()gives you a number between 0.0 and 1.0, it getting to the 0.5 or else case is literal orders of magnitude more likely.
I am also confused why you even started do deal with floats. They are hard to figure out and should be assumed to be non-deterministic case too.
Those are just the issues I could notice. Which should not be close to all of them.
I am very unskilled in programming and I am trying to finish this task for my class. I've looked all over the Internet but still can't find the answer.
Here I have a piece of my code which prints out letters and the number of times it was spotted in my text file:
for (int i = 0; i < (int)char.MaxValue; i++)
{
if (c[i] > 0 &&
char.IsLetter((char)i))
{
Console.WriteLine("Letter: {0} Frequency: {1}",
(char)i,
c[i]);
}
I've calculated the number of letters in my code using int count = s.Count(char.IsLetter);
Dividing the c[i], obviously, doesn't work. I've tried several other methods but I keep getting errors. I feel like the solution is very simple but I simply can't see it. I hope that you will be willing to help me out :)
You could use a dictionary to store the frequency of each letter. You also shouldn't loop with the constraint i < (int)char.MaxValue. This will put you out of bounds unless c's length is >= char.MaxValue.
var frequency = new Dictionary<char, int>();
for (var i = 0; i < c.Length; i++)
{
var current = (char)c[i];
if (current > 0 && char.IsLetter(current))
{
if (!frequency.ContainsKey(current))
frequency.Add(current);
frequency[current]++;
Console.WriteLine("Letter: {0} Frequency: {1}", current, frequency[current]);
}
}
Maybe you have an integer division when you want a floating point division? In that case, cast either the dividend or the divisor to double (the other one will be converted automatically), for example:
(double)c[i] / count
Edit: Since you write percentage, if you need to multiply by one hundred, you can also make sure that literal is a double, then if you are careful with the precedence of the operators, you can have all casts implicit. Example:
Console.WriteLine($"Letter: {(char)i} Count: {c[i]} Percentage {c[i] * 100.0 / count}");
The multiplication goes first because of left-associativity. The literal 100.0 has type double.
Okay, so this is my challenge taken from CodeEval. I have to read numbers from a file that is formatted in a standard way, it has a pair of numbers separated by a comma on each line (x, n). I have to read in the pair values and process them, then print out the smallest multiple of n which is greater than or equal to x, where n is a power of 2.
EXACT REQUIREMENT: Given numbers x and n, where n is a power of 2, print out the smallest multiple of n which is greater than or equal to x. Do not use division or modulo operator.
I have come up with a number of solutions, but none of them satisfy the computer's conditions to let me pass the challenge. I only get a partial completion with scores that vary from 30 to 80 (from 100).
I'm assuming that my solutions do not pass the speed but more likely the memory-usage requirements.
I would greatly appreciate it if anyone can enlighten me and offer some better, more efficient solutions.
Here are two of my solutions:
var filePath = #"C:\Users\myfile.txt";
int x;
int n;
using (var reader = new StreamReader(filePath))
{
string numsFile = string.Empty;
while ((numsFile = reader.ReadLine()) != null)
{
var nums = numsFile.Split(',').ToArray();
x = int.Parse(nums[0]);
n = int.Parse(nums[1]);
Console.WriteLine(DangleNumbers(x, n));
}
}
<<<>>>
var fileNums = File.ReadAllLines(filePath);
foreach (var line in fileNums)
{
var nums = line.Split(',').ToArray();
x = int.Parse(nums[0]);
n = int.Parse(nums[1]);
Console.WriteLine(DangleNumbers(x, n));
}
Method to check numbers
public static int DangleNumbers(int x, int n)
{
int m = 2;
while ((n * m) < x)
{
m += 2;
}
return m * n;
}
I'm fairly new to C# and programming but these two ways I found to get the best score from several others I have tried. I'm thinking that it's not too optimal for a new string to be created on each iteration, nor do I know how to use a StringBuilder and get the values into an Int from it.
Any pointers in the right direction would be appreciated as I would really like to get this challenge passed.
The smallest multiple of n that is larger or equal to x is likely this:
if(x <= n)
{
return n;
}
else
{
return x % n == 0 ? x : (x/n + 1) * n;
}
As x and n are integers, the result of x/n will be truncated (or effectively rounded down). So the next integer larger than x that is a multiple of n is (x/n + 1) * n
Since you missed the requirements, the modulo version was the most obvious choice. Though you still got your method wrong. m = 2 would not result in the smallest being returned but it could actually be the double of the smallest if n is already larger than x.
x = 7, n = 8 would get you 16 instead of 8.
Also adding 2 to m would result in a similar problem.
x = 5, n = 2 would get you 8 instead of 6.
use the following method instead:
public static int DangleNumbers(int x, int n)
{
int result = n;
while(result < x)
result += n;
return result;
}
Still capable of begin optimized but at least right according to the (now) stated constraints.
I have tried to improve the solution with some suggestions from you guys and take the variables outside the loop and drop the ToArray() call which was redundant.
static void Main(string[] args)
{
var filePath = #"C:\Users\sorin\Desktop\sorvas.txt";
int x;
int n;
string[] nums;
using (var reader = new StreamReader(filePath))
{
string numsFile = string.Empty;
while ((numsFile = reader.ReadLine()) != null)
{
nums = numsFile.Split(',');
x = int.Parse(nums[0]);
n = int.Parse(nums[1]);
Console.WriteLine(DangleNumbers(x, n));
}
}
}
public static int DangleNumbers(int x, int n)
{
int m = 2;
while ((n * m) < x)
{
m += 2;
}
return m * n;
}
So it looks like this. The thing is that even if now the numbers have slightly improved, I got a lower score.
May it be their system to blame ?
Using the first option of reading line by line (rather than reading all lines) is clearly going to use less memory (except potentially in the case where the file is very small (eg "1,1") in which case the overhead of the reader may cause problems but at that point the memory used is probably irrelevant.
Likewise declaring the variables outside the loop is generally better but in this case since the objects are value types I'm not sure it makes a difference.
Lastly the most efficient way of doing your DangleNumbers method is probably using bitwise logic operators and the fact that n is always a power of 2. Here is my attempt:
public static int DangleNumbers3(int x, int n)
{
return ((x-1) & ~(n-1))+n;
}
Essentially it relies on the fact that in binary a power of n is always a 1 followed by zero or more zeros. Thus a multiple of n will always end in that same number of zeros. So if n has M zeros after the one then you can take the binary form of x and if it already ends in M zeros then you have your answer. Otherwise you zero out the last M digits at which point you have the multiple of n that is just under x and then you add 1.
In the code ~(n-1) is a bitmask that has M zeros at the end and the leading digits are all 1. Thus when you AND it with a number it will zero out the trailing digits. I apply this to (x-1) to avoid having to do the check for if it is already the answer and have special cases.
It is important to note that this only works because of the special form of n as a power of 2. This method avoids the need for any loops and thus should run much faster (it has five operations total and no branching at all compared to other looping methods which will tend to have at the very least an operation and a comparison per loop.
This is a problem from Project Euler, and this question includes some source code, so consider this your spoiler alert, in case you are interested in solving it yourself. It is discouraged to distribute solutions to the problems, and that isn't what I want. I just need a little nudge and guidance in the right direction, in good faith.
The problem reads as follows:
2^15 = 32768 and the sum of its digits is 3 + 2 + 7 + 6 + 8 = 26.
What is the sum of the digits of the number 2^1000?
I understand the premise and math of the problem, but I've only started practicing C# a week ago, so my programming is shaky at best.
I know that int, long and double are hopelessly inadequate for holding the 300+ (base 10) digits of 2^1000 precisely, so some strategy is needed. My strategy was to set a calculation which gets the digits one by one, and hope that the compiler could figure out how to calculate each digit without some error like overflow:
using System;
using System.IO;
using System.Windows.Forms;
namespace euler016
{
class DigitSum
{
// sum all the (base 10) digits of 2^powerOfTwo
[STAThread]
static void Main(string[] args)
{
int powerOfTwo = 1000;
int sum = 0;
// iterate through each (base 10) digit of 2^powerOfTwo, from right to left
for (int digit = 0; Math.Pow(10, digit) < Math.Pow(2, powerOfTwo); digit++)
{
// add next rightmost digit to sum
sum += (int)((Math.Pow(2, powerOfTwo) / Math.Pow(10, digit) % 10));
}
// write output to console, and save solution to clipboard
Console.Write("Power of two: {0} Sum of digits: {1}\n", powerOfTwo, sum);
Clipboard.SetText(sum.ToString());
Console.WriteLine("Answer copied to clipboard. Press any key to exit.");
Console.ReadKey();
}
}
}
It seems to work perfectly for powerOfTwo < 34. My calculator ran out of significant digits above that, so I couldn't test higher powers. But tracing the program, it looks like no overflow is occurring: the number of digits calculated gradually increases as powerOfTwo = 1000 increases, and the sum of digits also (on average) increases with increasing powerOfTwo.
For the actual calculation I am supposed to perform, I get the output:
Power of two: 1000 Sum of digits: 1189
But 1189 isn't the right answer. What is wrong with my program? I am open to any and all constructive criticisms.
For calculating the values of such big numbers you not only need to be a good programmer but also a good mathematician. Here is a hint for you,
there's familiar formula ax = ex ln a , or if you prefer, ax = 10x log a.
More specific to your problem
21000 Find the common (base 10) log of 2, and multiply it by 1000; this is the power of 10. If you get something like 1053.142 (53.142 = log 2 value * 1000) - which you most likely will - then that is 1053 x 100.142; just evaluate 100.142 and you will get a number between 1 and 10; and multiply that by 1053, But this 1053 will not be useful as 53 zero sum will be zero only.
For log calculation in C#
Math.Log(num, base);
For more accuracy you can use, Log and Pow function of Big Integer.
Now rest programming help I believe you can have from your side.
Normal int can't help you with such a large number. Not even long. They are never designed to handle numbers such huge. int can store around 10 digits (exact max: 2,147,483,647) and long for around 19 digits (exact max: 9,223,372,036,854,775,807). However, A quick calculation from built-in Windows calculator tells me 2^1000 is a number of more than 300 digits.
(side note: the exact value can be obtained from int.MAX_VALUE and long.MAX_VALUE respectively)
As you want precise sum of digits, even float or double types won't work because they only store significant digits for few to some tens of digits. (7 digit for float, 15-16 digits for double). Read here for more information about floating point representation, double precision
However, C# provides a built-in arithmetic
BigInteger for arbitrary precision, which should suit your (testing) needs. i.e. can do arithmetic in any number of digits (Theoretically of course. In practice it is limited by memory of your physical machine really, and takes time too depending on your CPU power)
Back to your code, I think the problem is here
Math.Pow(2, powerOfTwo)
This overflows the calculation. Well, not really, but it is the double precision is not precisely representing the actual value of the result, as I said.
A solution without using the BigInteger class is to store each digit in it's own int and then do the multiplication manually.
static void Problem16()
{
int[] digits = new int[350];
//we're doing multiplication so start with a value of 1
digits[0] = 1;
//2^1000 so we'll be multiplying 1000 times
for (int i = 0; i < 1000; i++)
{
//run down the entire array multiplying each digit by 2
for (int j = digits.Length - 2; j >= 0; j--)
{
//multiply
digits[j] *= 2;
//carry
digits[j + 1] += digits[j] / 10;
//reduce
digits[j] %= 10;
}
}
//now just collect the result
long result = 0;
for (int i = 0; i < digits.Length; i++)
{
result += digits[i];
}
Console.WriteLine(result);
Console.ReadKey();
}
I used bitwise shifting to left. Then converting to array and summing its elements. My end result is 1366, Do not forget to add reference to System.Numerics;
BigInteger i = 1;
i = i << 1000;
char[] myBigInt = i.ToString().ToCharArray();
long sum = long.Parse(myBigInt[0].ToString());
for (int a = 0; a < myBigInt.Length - 1; a++)
{
sum += long.Parse(myBigInt[a + 1].ToString());
}
Console.WriteLine(sum);
since the question is c# specific using a bigInt might do the job. in java and python too it works but in languages like c and c++ where the facility is not available you have to take a array and do multiplication. take a big digit in array and multiply it with 2. that would be simple and will help in improving your logical skill. and coming to project Euler. there is a problem in which you have to find 100! you might want to apply the same logic for that too.
Try using BigInteger type , 2^100 will end up to a a very large number for even double to handle.
BigInteger bi= new BigInteger("2");
bi=bi.pow(1000);
// System.out.println("Val:"+bi.toString());
String stringArr[]=bi.toString().split("");
int sum=0;
for (String string : stringArr)
{ if(!string.isEmpty()) sum+=Integer.parseInt(string); }
System.out.println("Sum:"+sum);
------------------------------------------------------------------------
output :=> Sum:1366
Here's my solution in JavaScript
(function (exponent) {
const num = BigInt(Math.pow(2, exponent))
let arr = num.toString().split('')
arr.slice(arr.length - 1)
const result = arr.reduce((r,c)=> parseInt(r)+parseInt(c))
console.log(result)
})(1000)
This is not a serious answer—just an observation.
Although it is a good challenge to try to beat Project Euler using only one programming language, I believe the site aims to further the horizons of all programmers who attempt it. In other words, consider using a different programming language.
A Common Lisp solution to the problem could be as simple as
(defun sum_digits (x)
(if (= x 0)
0
(+ (mod x 10) (sum_digits (truncate (/ x 10))))))
(print (sum_digits (expt 2 1000)))
main()
{
char c[60];
int k=0;
while(k<=59)
{
c[k]='0';
k++;
}
c[59]='2';
int n=1;
while(n<=999)
{
k=0;
while(k<=59)
{
c[k]=(c[k]*2)-48;
k++;
}
k=0;
while(k<=59)
{
if(c[k]>57){ c[k-1]+=1;c[k]-=10; }
k++;
}
if(c[0]>57)
{
k=0;
while(k<=59)
{
c[k]=c[k]/2;
k++;
}
printf("%s",c);
exit(0);
}
n++;
}
printf("%s",c);
}
Python makes it very simple to compute this with an oneliner:
print sum(int(digit) for digit in str(2**1000))
or alternatively with map:
print sum(map(int,str(2**1000)))
I'm trying to refactor this algorithm to make it faster. What would be the first refactoring here for speed?
public int GetHowManyFactors(int numberToCheck)
{
// we know 1 is a factor and the numberToCheck
int factorCount = 2;
// start from 2 as we know 1 is a factor, and less than as numberToCheck is a factor
for (int i = 2; i < numberToCheck; i++)
{
if (numberToCheck % i == 0)
factorCount++;
}
return factorCount;
}
The first optimization you could make is that you only need to check up to the square root of the number. This is because factors come in pairs where one is less than the square root and the other is greater.
One exception to this is if n is an exact square then its square root is a factor of n but not part of a pair.
For example if your number is 30 the factors are in these pairs:
1 x 30
2 x 15
3 x 10
5 x 6
So you don't need to check any numbers higher than 5 because all the other factors can already be deduced to exist once you find the corresponding small factor in the pair.
Here is one way to do it in C#:
public int GetFactorCount(int numberToCheck)
{
int factorCount = 0;
int sqrt = (int)Math.Ceiling(Math.Sqrt(numberToCheck));
// Start from 1 as we want our method to also work when numberToCheck is 0 or 1.
for (int i = 1; i < sqrt; i++)
{
if (numberToCheck % i == 0)
{
factorCount += 2; // We found a pair of factors.
}
}
// Check if our number is an exact square.
if (sqrt * sqrt == numberToCheck)
{
factorCount++;
}
return factorCount;
}
There are other approaches you could use that are faster but you might find that this is already fast enough for your needs, especially if you only need it to work with 32-bit integers.
Reducing the bound of how high you have to go as you could knowingly stop at the square root of the number, though this does carry the caution of picking out squares that would have the odd number of factors, but it does help reduce how often the loop has to be executed.
Looks like there is a lengthy discussion about this exact topic here: Algorithm to calculate the number of divisors of a given number
Hope this helps
The first thing to notice is that it suffices to find all of the prime factors. Once you have these it's easy to find the number of total divisors: for each prime, add 1 to the number of times it appears and multiply these together. So for 12 = 2 * 2 * 3 you have (2 + 1) * (1 + 1) = 3 * 2 = 6 factors.
The next thing follows from the first: when you find a factor, divide it out so that the resulting number is smaller. When you combine this with the fact that you need only check to the square root of the current number this is a huge improvement. For example, consider N = 10714293844487412. Naively it would take N steps. Checking up to its square root takes sqrt(N) or about 100 million steps. But since the factors 2, 2, 3, and 953 are discovered early on you actually only need to check to one million -- a 100x improvement!
Another improvement: you don't need to check every number to see if it divides your number, just the primes. If it's more convenient you can use 2 and the odd numbers, or 2, 3, and the numbers 6n-1 and 6n+1 (a basic wheel sieve).
Here's another nice improvement. If you can quickly determine whether a number is prime, you can reduce the need for division even further. Suppose, after removing small factors, you have 120528291333090808192969. Even checking up to its square root will take a long time -- 300 billion steps. But a Miller-Rabin test (very fast -- maybe 10 to 20 nanoseconds) will show that this number is composite. How does this help? It means that if you check up to its cube root and find no factors, then there are exactly two primes left. If the number is a square, its factors are prime; if the number is not a square, the numbers are distinct primes. This means you can multiply your 'running total' by 3 or 4, respectively, to get the final answer -- even without knowing the factors! This can make more of a difference than you'd guess: the number of steps needed drops from 300 billion to just 50 million, a 6000-fold improvement!
The only trouble with the above is that Miller-Rabin can only prove that numbers are composite; if it's given a prime it can't prove that the number is prime. In that case you may wish to write a primality-proving function to spare yourself the effort of factoring to the square root of the number. (Alternately, you could just do a few more Miller-Rabin tests, if you would be satisfied with high confidence that your answer is correct rather than a proof that it is. If a number passes 15 tests then it's composite with probability less than 1 in a billion.)
You can limit the upper limit of your FOR loop to numberToCheck / 2
Start your loop counter at 2 (if your number is even) or 3 (for odd values). This should allow you to check every other number dropping your loop count by another 50%.
public int GetHowManyFactors(int numberToCheck)
{
// we know 1 is a factor and the numberToCheck
int factorCount = 2;
int i = 2 + ( numberToCheck % 2 ); //start at 2 (or 3 if numberToCheck is odd)
for( ; i < numberToCheck / 2; i+=2)
{
if (numberToCheck % i == 0)
factorCount++;
}
return factorCount;
}
Well if you are going to use this function a lot you can use modified algorithm of Eratosthenes http://en.wikipedia.org/wiki/Sieve_of_Eratosthenes and store answars for a interval 1 to Max in array. It will run IntializeArray() once and after it will return answers in 0(1).
const int Max =1000000;
int arr [] = new int [Max+1];
public void InitializeArray()
{
for(int i=1;i<=Max;++i)
arr[i]=1;//1 is factor for everyone
for(int i=2;i<=Max;++i)
for(int j=i;i<=Max;i+=j)
++arr[j];
}
public int GetHowManyFactors(int numberToCheck)
{
return arr[numberToCheck];
}
But if you are not going to use this function a lot I think best solution is to check unitll square root.
Note: I have corrected my code!
An easy to implement algorithm that will bring you much farther than trial division is Pollard Rho
Here is a Java implementation, that should be easy to adapt to C#: http://www.cs.princeton.edu/introcs/78crypto/PollardRho.java.html
https://codility.com/demo/results/demoAAW2WH-MGF/
public int solution(int n) {
var counter = 0;
if (n == 1) return 1;
counter = 2; //1 and itself
int sqrtPoint = (Int32)(Math.Truncate(Math.Sqrt(n)));
for (int i = 2; i <= sqrtPoint; i++)
{
if (n % i == 0)
{
counter += 2; // We found a pair of factors.
}
}
// Check if our number is an exact square.
if (sqrtPoint * sqrtPoint == n)
{
counter -=1;
}
return counter;
}
Codility Python 100 %
Here is solution in python with little explanation-
def solution(N):
"""
Problem Statement can be found here-
https://app.codility.com/demo/results/trainingJNNRF6-VG4/
Codility 100%
Idea is count decedent factor in single travers. ie. if 24 is divisible by 4 then it is also divisible by 8
Traverse only up to square root of number ie. in case of 24, 4*4 < 24 but 5*5!<24 so loop through only i*i<N
"""
print(N)
count = 0
i = 1
while i * i <= N:
if N % i == 0:
print()
print("Divisible by " + str(i))
if i * i == N:
count += 1
print("Count increase by one " + str(count))
else:
count += 2
print("Also divisible by " + str(int(N / i)))
print("Count increase by two count " + str(count))
i += 1
return count
Example by run-
if __name__ == '__main__':
# result = solution(24)
# result = solution(35)
result = solution(1)
print("")
print("Solution " + str(result))
"""
Example1-
24
Divisible by 1
Also divisible by 24
Count increase by two count 2
Divisible by 2
Also divisible by 12
Count increase by two count 4
Divisible by 3
Also divisible by 8
Count increase by two count 6
Divisible by 4
Also divisible by 6
Count increase by two count 8
Solution 8
Example2-
35
Divisible by 1
Also divisible by 35
Count increase by two count 2
Divisible by 5
Also divisible by 7
Count increase by two count 4
Solution 4
Example3-
1
Divisible by 1
Count increase by one 1
Solution 1
"""
Github link
I got pretty good results with complexity of O(sqrt(N)).
if (N == 1) return 1;
int divisors = 0;
int max = N;
for (int div = 1; div < max; div++) {
if (N % div == 0) {
divisors++;
if (div != N/div) {
divisors++;
}
}
if (N/div < max) {
max = N/div;
}
}
return divisors;
Python Implementation
Score 100% https://app.codility.com/demo/results/trainingJ78AK2-DZ5/
import math;
def solution(N):
# write your code in Python 3.6
NumberFactor=2; #one and the number itself
if(N==1):
return 1;
if(N==2):
return 2;
squareN=int(math.sqrt(N)) +1;
#print(squareN)
for elem in range (2,squareN):
if(N%elem==0):
NumberFactor+=2;
if( (squareN-1) * (squareN-1) ==N):
NumberFactor-=1;
return NumberFactor