C# - A faster alternative to Convert.ToSingle()

C# - A faster alternative to Convert.ToSingle() - c#

I'm working on a program which reads millions of floating point numbers from a text file. This program runs inside of a game that I'm designing, so I need it to be fast (I'm loading an obj file). So far, loading a relatively small file takes about a minute (without precompilation) because of the slow speed of Convert.ToSingle(). Is there a faster way to do this?
EDIT: Here's the code I use to parse the Obj file
http://pastebin.com/TfgEge9J
using System;
using System.IO;
using System.Collections.Generic;
using OpenTK.Math;
using System.Drawing;
using PlatformLib;
public class ObjMeshLoader
{
public static StreamReader[] LoadMeshes(string fileName)
{
StreamReader mreader = new StreamReader(PlatformLib.Platform.openFile(fileName));
MemoryStream current = null;
List<MemoryStream> mstreams = new List<MemoryStream>();
StreamWriter mwriter = null;
if (!mreader.ReadLine().Contains("#"))
{
mreader.BaseStream.Close();
throw new Exception("Invalid header");
}
while (!mreader.EndOfStream)
{
string cmd = mreader.ReadLine();
string line = cmd;
line = line.Trim(splitCharacters);
line = line.Replace(" ", " ");
string[] parameters = line.Split(splitCharacters);
if (parameters[0] == "mtllib")
{
loadMaterials(parameters[1]);
}
if (parameters[0] == "o")
{
if (mwriter != null)
{
mwriter.Flush();
current.Position = 0;
}
current = new MemoryStream();
mwriter = new StreamWriter(current);
mwriter.WriteLine(parameters[1]);
mstreams.Add(current);
}
else
{
if (mwriter != null)
{
mwriter.WriteLine(cmd);
mwriter.Flush();
}
}
}
mwriter.Flush();
current.Position = 0;
List<StreamReader> readers = new List<StreamReader>();
foreach (MemoryStream e in mstreams)
{
e.Position = 0;
StreamReader sreader = new StreamReader(e);
readers.Add(sreader);
}
return readers.ToArray();
}
public static bool Load(ObjMesh mesh, string fileName)
{
try
{
using (StreamReader streamReader = new StreamReader(Platform.openFile(fileName)))
{
Load(mesh, streamReader);
streamReader.Close();
return true;
}
}
catch { return false; }
}
public static bool Load2(ObjMesh mesh, StreamReader streamReader, ObjMesh prevmesh)
{
if (prevmesh != null)
{
//mesh.Vertices = prevmesh.Vertices;
}
try
{
//streamReader.BaseStream.Position = 0;
Load(mesh, streamReader);
streamReader.Close();
#if DEBUG
Console.WriteLine("Loaded "+mesh.Triangles.Length.ToString()+" triangles and"+mesh.Quads.Length.ToString()+" quadrilaterals parsed, with a grand total of "+mesh.Vertices.Length.ToString()+" vertices.");
#endif
return true;
}
catch (Exception er) { Console.WriteLine(er); return false; }
}
static char[] splitCharacters = new char[] { ' ' };
static List<Vector3> vertices;
static List<Vector3> normals;
static List<Vector2> texCoords;
static Dictionary<ObjMesh.ObjVertex, int> objVerticesIndexDictionary;
static List<ObjMesh.ObjVertex> objVertices;
static List<ObjMesh.ObjTriangle> objTriangles;
static List<ObjMesh.ObjQuad> objQuads;
static Dictionary<string, Bitmap> materials = new Dictionary<string, Bitmap>();
static void loadMaterials(string path)
{
StreamReader mreader = new StreamReader(Platform.openFile(path));
string current = "";
bool isfound = false;
while (!mreader.EndOfStream)
{
string line = mreader.ReadLine();
line = line.Trim(splitCharacters);
line = line.Replace(" ", " ");
string[] parameters = line.Split(splitCharacters);
if (parameters[0] == "newmtl")
{
if (materials.ContainsKey(parameters[1]))
{
isfound = true;
}
else
{
current = parameters[1];
}
}
if (parameters[0] == "map_Kd")
{
if (!isfound)
{
string filename = "";
for (int i = 1; i < parameters.Length; i++)
{
filename += parameters[i];
}
string searcher = "\\" + "\\";
filename.Replace(searcher, "\\");
Bitmap mymap = new Bitmap(filename);
materials.Add(current, mymap);
isfound = false;
}
}
}
}
static float parsefloat(string val)
{
return Convert.ToSingle(val);
}
int remaining = 0;
static string GetLine(string text, ref int pos)
{
string retval = text.Substring(pos, text.IndexOf(Environment.NewLine, pos));
pos = text.IndexOf(Environment.NewLine, pos);
return retval;
}
static void Load(ObjMesh mesh, StreamReader textReader)
{
//try {
//vertices = null;
//objVertices = null;
if (vertices == null)
{
vertices = new List<Vector3>();
}
if (normals == null)
{
normals = new List<Vector3>();
}
if (texCoords == null)
{
texCoords = new List<Vector2>();
}
if (objVerticesIndexDictionary == null)
{
objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>();
}
if (objVertices == null)
{
objVertices = new List<ObjMesh.ObjVertex>();
}
objTriangles = new List<ObjMesh.ObjTriangle>();
objQuads = new List<ObjMesh.ObjQuad>();
mesh.vertexPositionOffset = vertices.Count;
string line;
string alltext = textReader.ReadToEnd();
int pos = 0;
while ((line = GetLine(alltext, pos)) != null)
{
if (line.Length < 2)
{
break;
}
//line = line.Trim(splitCharacters);
//line = line.Replace(" ", " ");
string[] parameters = line.Split(splitCharacters);
switch (parameters[0])
{
case "usemtl":
//Material specification
try
{
mesh.Material = materials[parameters[1]];
}
catch (KeyNotFoundException)
{
Console.WriteLine("WARNING: Texture parse failure: " + parameters[1]);
}
break;
case "p": // Point
break;
case "v": // Vertex
float x = parsefloat(parameters[1]);
float y = parsefloat(parameters[2]);
float z = parsefloat(parameters[3]);
vertices.Add(new Vector3(x, y, z));
break;
case "vt": // TexCoord
float u = parsefloat(parameters[1]);
float v = parsefloat(parameters[2]);
texCoords.Add(new Vector2(u, v));
break;
case "vn": // Normal
float nx = parsefloat(parameters[1]);
float ny = parsefloat(parameters[2]);
float nz = parsefloat(parameters[3]);
normals.Add(new Vector3(nx, ny, nz));
break;
case "f":
switch (parameters.Length)
{
case 4:
ObjMesh.ObjTriangle objTriangle = new ObjMesh.ObjTriangle();
objTriangle.Index0 = ParseFaceParameter(parameters[1]);
objTriangle.Index1 = ParseFaceParameter(parameters[2]);
objTriangle.Index2 = ParseFaceParameter(parameters[3]);
objTriangles.Add(objTriangle);
break;
case 5:
ObjMesh.ObjQuad objQuad = new ObjMesh.ObjQuad();
objQuad.Index0 = ParseFaceParameter(parameters[1]);
objQuad.Index1 = ParseFaceParameter(parameters[2]);
objQuad.Index2 = ParseFaceParameter(parameters[3]);
objQuad.Index3 = ParseFaceParameter(parameters[4]);
objQuads.Add(objQuad);
break;
}
break;
}
}
//}catch(Exception er) {
// Console.WriteLine(er);
// Console.WriteLine("Successfully recovered. Bounds/Collision checking may fail though");
//}
mesh.Vertices = objVertices.ToArray();
mesh.Triangles = objTriangles.ToArray();
mesh.Quads = objQuads.ToArray();
textReader.BaseStream.Close();
}
public static void Clear()
{
objVerticesIndexDictionary = null;
vertices = null;
normals = null;
texCoords = null;
objVertices = null;
objTriangles = null;
objQuads = null;
}
static char[] faceParamaterSplitter = new char[] { '/' };
static int ParseFaceParameter(string faceParameter)
{
Vector3 vertex = new Vector3();
Vector2 texCoord = new Vector2();
Vector3 normal = new Vector3();
string[] parameters = faceParameter.Split(faceParamaterSplitter);
int vertexIndex = Convert.ToInt32(parameters[0]);
if (vertexIndex < 0) vertexIndex = vertices.Count + vertexIndex;
else vertexIndex = vertexIndex - 1;
//Hmm. This seems to be broken.
try
{
vertex = vertices[vertexIndex];
}
catch (Exception)
{
throw new Exception("Vertex recognition failure at " + vertexIndex.ToString());
}
if (parameters.Length > 1)
{
int texCoordIndex = Convert.ToInt32(parameters[1]);
if (texCoordIndex < 0) texCoordIndex = texCoords.Count + texCoordIndex;
else texCoordIndex = texCoordIndex - 1;
try
{
texCoord = texCoords[texCoordIndex];
}
catch (Exception)
{
Console.WriteLine("ERR: Vertex " + vertexIndex + " not found. ");
throw new DllNotFoundException(vertexIndex.ToString());
}
}
if (parameters.Length > 2)
{
int normalIndex = Convert.ToInt32(parameters[2]);
if (normalIndex < 0) normalIndex = normals.Count + normalIndex;
else normalIndex = normalIndex - 1;
normal = normals[normalIndex];
}
return FindOrAddObjVertex(ref vertex, ref texCoord, ref normal);
}
static int FindOrAddObjVertex(ref Vector3 vertex, ref Vector2 texCoord, ref Vector3 normal)
{
ObjMesh.ObjVertex newObjVertex = new ObjMesh.ObjVertex();
newObjVertex.Vertex = vertex;
newObjVertex.TexCoord = texCoord;
newObjVertex.Normal = normal;
int index;
if (objVerticesIndexDictionary.TryGetValue(newObjVertex, out index))
{
return index;
}
else
{
objVertices.Add(newObjVertex);
objVerticesIndexDictionary[newObjVertex] = objVertices.Count - 1;
return objVertices.Count - 1;
}
}
}

Based on your description and the code you've posted, I'm going to bet that your problem isn't with the reading, the parsing, or the way you're adding things to your collections. The most likely problem is that your ObjMesh.Objvertex structure doesn't override GetHashCode. (I'm assuming that you're using code similar to http://www.opentk.com/files/ObjMesh.cs.
If you're not overriding GetHashCode, then your objVerticesIndexDictionary is going to perform very much like a linear list. That would account for the performance problem that you're experiencing.
I suggest that you look into providing a good GetHashCode method for your ObjMesh.Objvertex class.
See Why is ValueType.GetHashCode() implemented like it is? for information about the default GetHashCode implementation for value types and why it's not suitable for use in a hash table or dictionary.

Edit 3: The problem is NOT with the parsing.
It's with how you read the file. If you read it properly, it would be faster; however, it seems like your reading is unusually slow. My original suspicion was that it was because of excess allocations, but it seems like there might be other problems with your code too, since that doesn't explain the entire slowdown.
Nevertheless, here's a piece of code I made that completely avoids all object allocations:
static void Main(string[] args)
{
long counter = 0;
var sw = Stopwatch.StartNew();
var sb = new StringBuilder();
var text = File.ReadAllText("spacestation.obj");
for (int i = 0; i < text.Length; i++)
{
int start = i;
while (i < text.Length &&
(char.IsDigit(text[i]) || text[i] == '-' || text[i] == '.'))
{ i++; }
if (i > start)
{
sb.Append(text, start, i - start); //Copy data to the buffer
float value = Parse(sb); //Parse the data
sb.Remove(0, sb.Length); //Clear the buffer
counter++;
}
}
sw.Stop();
Console.WriteLine("{0:N0}", sw.Elapsed.TotalSeconds); //Only a few ms
}
with this parser:
const int MIN_POW_10 = -16, int MAX_POW_10 = 16,
NUM_POWS_10 = MAX_POW_10 - MIN_POW_10 + 1;
static readonly float[] pow10 = GenerateLookupTable();
static float[] GenerateLookupTable()
{
var result = new float[(-MIN_POW_10 + MAX_POW_10) * 10];
for (int i = 0; i < result.Length; i++)
result[i] = (float)((i / NUM_POWS_10) *
Math.Pow(10, i % NUM_POWS_10 + MIN_POW_10));
return result;
}
static float Parse(StringBuilder str)
{
float result = 0;
bool negate = false;
int len = str.Length;
int decimalIndex = str.Length;
for (int i = len - 1; i >= 0; i--)
if (str[i] == '.')
{ decimalIndex = i; break; }
int offset = -MIN_POW_10 + decimalIndex;
for (int i = 0; i < decimalIndex; i++)
if (i != decimalIndex && str[i] != '-')
result += pow10[(str[i] - '0') * NUM_POWS_10 + offset - i - 1];
else if (str[i] == '-')
negate = true;
for (int i = decimalIndex + 1; i < len; i++)
if (i != decimalIndex)
result += pow10[(str[i] - '0') * NUM_POWS_10 + offset - i];
if (negate)
result = -result;
return result;
}
it happens in a small fraction of a second.
Of course, this parser is poorly tested and has these current restrictions (and more):
Don't try parsing more digits (decimal and whole) than provided for in the array.
No error handling whatsoever.
Only parses decimals, not exponents! i.e. it can parse 1234.56 but not 1.23456E3.
Doesn't care about globalization/localization. Your file is only in a single format, so there's no point caring about that kind of stuff because you're probably using English to store it anyway.
It seems like you won't necessarily need this much overkill, but take a look at your code and try to figure out the bottleneck. It seems to be neither the reading nor the parsing.

Have you measured that the speed problem is really caused by Convert.ToSingle?
In the code you included, I see you create lists and dictionaries like this:
normals = new List<Vector3>();
texCoords = new List<Vector2>();
objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>();
And then when you read the file, you add in the collection one item at a time.
One of the possible optimizations would be to save total number of normals, texCoords, indexes and everything at the start of the file, and then initialize these collections by these numbers. This will pre-allocate the buffers used by collections, so adding items to the them will be pretty fast.
So the collection creation should look like this:
// These values should be stored at the beginning of the file
int totalNormals = Convert.ToInt32(textReader.ReadLine());
int totalTexCoords = Convert.ToInt32(textReader.ReadLine());
int totalIndexes = Convert.ToInt32(textReader.ReadLine());
normals = new List<Vector3>(totalNormals);
texCoords = new List<Vector2>(totalTexCoords);
objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>(totalIndexes);
See List<T> Constructor (Int32) and Dictionary<TKey, TValue> Constructor (Int32).

This related question is for C++, but is definitely worth a read.
For reading as fast as possible, you're probably going to want to map the file into memory and then parse using some custom floating point parser, especially if you know the numbers are always in a specific format (i.e. you're the one generating the input files in the first place).

I tested .Net string parsing once and the fastest function to parse text was the old VB Val() function. You could pull the relevant parts out of Microsoft.VisualBasic.Conversion Val(string)
Converting String to numbers
Comparison of relative test times (ms / 100000 conversions)
Double Single Integer Int(w/ decimal point)
14 13 6 16 Val(Str)
14 14 6 16 Cxx(Val(Str)) e.g., CSng(Val(str))
22 21 17 e! Convert.To(str)
23 21 16 e! XX.Parse(str) e.g. Single.Parse()
30 31 31 32 Cxx(str)
Val: fastest, part of VisualBasic dll, skips non-numeric,
ConvertTo and Parse: slower, part of core, exception on bad format (including decimal point)
Cxx: slowest (for strings), part of core, consistent times across formats

Related

Array.Contains() is always false

I've been smashing my head against this problem for days and have tried a tons of different things. I've been all over the forums, tried everything I've seen with no luck. My issue could be that I don't have an override, but I can't figure out how to get that to work.
I want to check if an array of 5,000+ elements contains a user-entered word. The word gets entered character by character and combined into a string(guessString). And then I use .Contains() to see if that word is in an array.
***EDIT please see screenshots for debug logs WordArray Elements -- Debug Output -- Debug With whitespace detection -- Code that doesnt work
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using TMPro;
using UnityEngine.EventSystems;
using UnityEngine.UI;
using UnityEngine.SceneManagement;
using System.Linq;
public class WordAction : MonoBehaviour
{
TMP_Text m_textComponent;
TMP_Text m_currentSquare;
public TMP_Text[] squareArray;
List<string> dupKey = new List<string>();
public string[] WordArray;
public List<string> DictionaryList = new List<string>();
public TextAsset file;
[SerializeField]
Color correctColor, wrongColor, maybeColor;
[SerializeField]
float colorFadeSpeed, colorFadeTime; // 2f, 1f
public float waitTime;
public string levelID;
public string key;
public AudioSource revealSFX;
bool guess;
string guessString;
int guessRegulator;
int guessCount = 1;
int lessGuessCount; // Starts variable at value of first current row element
int maxGuessCount;
string[] guessStringArray = new string[1];
void Start()
{
for (int i = 0; i < 5; i++) // Duplicate key
{
dupKey.Add(key[i].ToString());
}
var content = file.text;
string[] AllWords = content.Split('\n');
WordArray = AllWords;
}
public void Magic()
{
StartCoroutine(CompareKey());
}
IEnumerator CompareKey()
{
guessRegulator++;
GuessRegulatorFunction();
lessGuessCount = (guessCount * 5) - 5; // Starts variable at value of first current row element
maxGuessCount = guessCount * 5;
guessCount++; // Moves to next row
int k = 0; // Indexer for key[]
int cW = 0; // Indexer for CombineWord()
GameObject keyGO; // Keyboard GO
for (int i = lessGuessCount; i < maxGuessCount; i++)
{
if (cW < 1)
{
CombineWord(i);
cW++;
}
bool match = WordArray.Contains(guessString); // not working
Debug.Log(match);
if (match)
{
//do stuff
}
//compare stuff
string guessStr = squareArray[i].text.ToString();
string keyStr = key[k].ToString();
bool result = guessStr == keyStr;
if (!result && !dupKey.Contains(guessStr))
{
//wrong stuff
GameObject parentGO = squareArray[i].transform.parent.gameObject; // Gets parent of SquareArray element
Image parentImage = parentGO.GetComponent<Image>(); // Gets Image component of parent game object
keyGO = GameObject.Find(squareArray[i].text); // Keyboard
Image keyParentImage = keyGO.GetComponent<Image>(); // Keyboard
wrongColor.a = 255;
keyParentImage.color = wrongColor;
parentImage.color = wrongColor;
yield return null;
}
if (result)
{
//correct stuff
dupKey[k] = "";
GameObject parentGO = squareArray[i].transform.parent.gameObject; // Gets parent of SquareArray element
Image parentImage = parentGO.GetComponent<Image>(); // Gets Image component of parent game object
keyGO = GameObject.Find(squareArray[i].text); // Keyboard
Image keyParentImage = keyGO.GetComponent<Image>(); // Keyboard
correctColor.a = 255;
keyParentImage.color = correctColor;
parentImage.color = correctColor;
yield return null;
}
if (!result && dupKey.Contains(guessStr))
{
//yellow stuff
for (int x = 0; x < 5; x++) // Duplicate key
{
if (guessStr == dupKey[x])
{
dupKey[x] = "";
}
}
GameObject parentGO = squareArray[i].transform.parent.gameObject; // Gets parent of SquareArray element
Image parentImage = parentGO.GetComponent<Image>(); // Gets Image component of parent game object
keyGO = GameObject.Find(squareArray[i].text); // Keyboard
Image keyParentImage = keyGO.GetComponent<Image>(); // Keyboard
maybeColor.a = 255;
keyParentImage.color = maybeColor;
parentImage.color = maybeColor;
yield return null;
}
revealSFX.Play();
k++;
yield return new WaitForSeconds(waitTime);
}
dupKey.Clear();
for (int i = 0; i < 5; i++) // Duplicate key
{
dupKey.Add(key[i].ToString());
}
}
void GuessRegulatorFunction()
{
guessRegulator++; // Stops multiple guess attempts
for (int i = 0; i < (guessCount * 5); i++) // Checks if row is blank when guessing
{
if (squareArray[i].text == "")
{
guess = false;
guessRegulator = 0; // Resets guess regulator
break;
}
else
{
guess = true;
}
}
if (guessRegulator > 1 || guess == false) // Stops multiple guess attempts
{
return;
}
}
public void BackSpace()
{
for (int i = guessCount * 5; i > (guessCount * 5) - 6; i--)
{
if (squareArray[i].text != "")
{
squareArray[i].text = "";
break;
}
}
}
public void InputLetter()
{
guessRegulator = 0;
for (int i = 0; i < guessCount * 5; i++)
{
if (squareArray[i].text == "")
{
squareArray[i].text = EventSystem.current.currentSelectedGameObject.name.ToString();
break;
}
}
}
void CombineWord(int i)
{
var string1 = squareArray[i].text.ToString();
var string2 = squareArray[i + 1].text.ToString();
var string3 = squareArray[i + 2].text.ToString();
var string4 = squareArray[i + 3].text.ToString();
var string5 = squareArray[i + 4].text.ToString();
guessString = string1 + string2 + string3 + string4 + string5;
//Debug.Log(guessString);
}
}

I've taken your line of code that isn't working and copied it verbatim. I've then taken the data that you say is in the WordArray and guessString variables and set those up. Then I ran this:
var WordArray = new [] { "WHICH", "THERE", "THEIR", "ABOUT" };
var guessString= "THERE";
bool match = WordArray.Contains(guessString);
Console.WriteLine(match);
match comes out True.
Your variables do not contain the data you think they do.
It's likely that the content that you call .Split('\n') on actually contains Windows end of line markers, so a combination of "\r\n". Since you only split on '\n' it's likely that the '\r' remains in your strings and hence "THERE" does not match "THERE\r".
Try this instead:
.Split(new[] { Environment.NewLine }, StringSplitOptions.None);
If your code is run on either Windows or on Linux the above line works. Just watch out for files that mix the endings.

Well, your WordArray is always empty. You put your file content into a local variable called AllWords.
Your word will never be found in an empty array.

Importing and removing duplicates from a massive amount of text files using C# and Redis

This is a bit of a doozy and it's been a while since I worked with C#, so bear with me:
I'm running a jruby script to iterate through 900 files (5 Mb - 1500 Mb in size) to figure out how many dupes STILL exist within these (already uniq'd) files. I had little luck with awk.
My latest idea was to insert them into a local MongoDB instance like so:
db.collection('hashes').update({ :_id => hash}, { $inc: { count: 1} }, { upsert: true)
... so that later I could just query it like db.collection.where({ count: { $gt: 1 } }) to get all the dupes.
This is working great except it's been over 24 hours and at the time of writing I'm at 72,532,927 Mongo entries and growing.
I think Ruby's .each_line is bottlnecking the IO hardcore:
So what I'm thinking now is compiling a C# program which fires up a thread PER EACH FILE and inserts the line (md5 hash) into a Redis list.
From there, I could have another compiled C# program simply pop the values off and ignore the save if the count is 1.
So the questions are:
Will using a compiled file reader and multithreading the file reads significantly improve performance?
Is using Redis even necessary? With a tremendous amount of AWS memory, could I not just use the threads to fill some sort of a list atomically and proceed from there?
Thanks in advance.

Updated
New solution. Old solution. The main idea is to calculate dummy hashes(just sum of all chars in string) of each line and store it in Dictionary<ulong, List<LinePosition>> _hash2LinePositions. It's possible to have multiple hashes in the same stream and it solves by List in Dictionary Value. When the hashes are the same, we read and compare the strings from the streams. LinePosition is using for storing info about line - position in stream and its length. I don't have such huge files as you, but my tests shows that it works. Here is the full code:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
public class Solution
{
struct LinePosition
{
public long Start;
public long Length;
public LinePosition(long start, long count)
{
Start = start;
Length = count;
}
public override string ToString()
{
return string.Format("Start: {0}, Length: {1}", Start, Length);
}
}
class TextFileHasher : IDisposable
{
readonly Dictionary<ulong, List<LinePosition>> _hash2LinePositions;
readonly Stream _stream;
bool _isDisposed;
public HashSet<ulong> Hashes { get; private set; }
public string Name { get; private set; }
public TextFileHasher(string name, Stream stream)
{
Name = name;
_stream = stream;
_hash2LinePositions = new Dictionary<ulong, List<LinePosition>>();
Hashes = new HashSet<ulong>();
}
public override string ToString()
{
return Name;
}
public void CalculateFileHash()
{
int readByte = -1;
ulong dummyLineHash = 0;
// Line start position in file
long startPosition = 0;
while ((readByte = _stream.ReadByte()) != -1) {
// Read until new line
if (readByte == '\r' || readByte == '\n') {
// If there was data
if (dummyLineHash != 0) {
// Add line hash and line position to the dict
AddToDictAndHash(dummyLineHash, startPosition, _stream.Position - 1 - startPosition);
// Reset line hash
dummyLineHash = 0;
}
}
else {
// Was it new line ?
if (dummyLineHash == 0)
startPosition = _stream.Position - 1;
// Calculate dummy hash
dummyLineHash += (uint)readByte;
}
}
if (dummyLineHash != 0) {
// Add line hash and line position to the dict
AddToDictAndHash(dummyLineHash, startPosition, _stream.Position - startPosition);
// Reset line hash
dummyLineHash = 0;
}
}
public List<LinePosition> GetLinePositions(ulong hash)
{
return _hash2LinePositions[hash];
}
public List<string> GetDuplicates()
{
List<string> duplicates = new List<string>();
foreach (var key in _hash2LinePositions.Keys) {
List<LinePosition> linesPos = _hash2LinePositions[key];
if (linesPos.Count > 1) {
duplicates.AddRange(FindExactDuplicates(linesPos));
}
}
return duplicates;
}
public void Dispose()
{
if (_isDisposed)
return;
_stream.Dispose();
_isDisposed = true;
}
private void AddToDictAndHash(ulong hash, long start, long count)
{
List<LinePosition> linesPosition;
if (!_hash2LinePositions.TryGetValue(hash, out linesPosition)) {
linesPosition = new List<LinePosition>() { new LinePosition(start, count) };
_hash2LinePositions.Add(hash, linesPosition);
}
else {
linesPosition.Add(new LinePosition(start, count));
}
Hashes.Add(hash);
}
public byte[] GetLineAsByteArray(LinePosition prevPos)
{
long len = prevPos.Length;
byte[] lineBytes = new byte[len];
_stream.Seek(prevPos.Start, SeekOrigin.Begin);
_stream.Read(lineBytes, 0, (int)len);
return lineBytes;
}
private List<string> FindExactDuplicates(List<LinePosition> linesPos)
{
List<string> duplicates = new List<string>();
linesPos.Sort((x, y) => x.Length.CompareTo(y.Length));
LinePosition prevPos = linesPos[0];
for (int i = 1; i < linesPos.Count; i++) {
if (prevPos.Length == linesPos[i].Length) {
var prevLineArray = GetLineAsByteArray(prevPos);
var thisLineArray = GetLineAsByteArray(linesPos[i]);
if (prevLineArray.SequenceEqual(thisLineArray)) {
var line = System.Text.Encoding.Default.GetString(prevLineArray);
duplicates.Add(line);
}
#if false
string prevLine = System.Text.Encoding.Default.GetString(prevLineArray);
string thisLine = System.Text.Encoding.Default.GetString(thisLineArray);
Console.WriteLine("PrevLine: {0}\r\nThisLine: {1}", prevLine, thisLine);
StringBuilder sb = new StringBuilder();
sb.Append(prevPos);
sb.Append(" is '");
sb.Append(prevLine);
sb.Append("'. ");
sb.AppendLine();
sb.Append(linesPos[i]);
sb.Append(" is '");
sb.Append(thisLine);
sb.AppendLine("'. ");
sb.Append("Equals => ");
sb.Append(prevLine.CompareTo(thisLine) == 0);
Console.WriteLine(sb.ToString());
#endif
}
else {
prevPos = linesPos[i];
}
}
return duplicates;
}
}
public static void Main(String[] args)
{
List<TextFileHasher> textFileHashers = new List<TextFileHasher>();
string text1 = "abc\r\ncba\r\nabc";
TextFileHasher tfh1 = new TextFileHasher("Text1", new MemoryStream(System.Text.Encoding.Default.GetBytes(text1)));
tfh1.CalculateFileHash();
textFileHashers.Add(tfh1);
string text2 = "def\r\ncba\r\nwet";
TextFileHasher tfh2 = new TextFileHasher("Text2", new MemoryStream(System.Text.Encoding.Default.GetBytes(text2)));
tfh2.CalculateFileHash();
textFileHashers.Add(tfh2);
string text3 = "def\r\nbla\r\nwat";
TextFileHasher tfh3 = new TextFileHasher("Text3", new MemoryStream(System.Text.Encoding.Default.GetBytes(text3)));
tfh3.CalculateFileHash();
textFileHashers.Add(tfh3);
List<string> totalDuplicates = new List<string>();
Dictionary<ulong, Dictionary<TextFileHasher, List<LinePosition>>> totalHashes = new Dictionary<ulong, Dictionary<TextFileHasher, List<LinePosition>>>();
textFileHashers.ForEach(tfh => {
foreach(var dummyHash in tfh.Hashes) {
Dictionary<TextFileHasher, List<LinePosition>> tfh2LinePositions = null;
if (!totalHashes.TryGetValue(dummyHash, out tfh2LinePositions))
totalHashes[dummyHash] = new Dictionary<TextFileHasher, List<LinePosition>>() { { tfh, tfh.GetLinePositions(dummyHash) } };
else {
List<LinePosition> linePositions = null;
if (!tfh2LinePositions.TryGetValue(tfh, out linePositions))
tfh2LinePositions[tfh] = tfh.GetLinePositions(dummyHash);
else
linePositions.AddRange(tfh.GetLinePositions(dummyHash));
}
}
});
HashSet<TextFileHasher> alreadyGotDuplicates = new HashSet<TextFileHasher>();
foreach(var hash in totalHashes.Keys) {
var tfh2LinePositions = totalHashes[hash];
var tfh = tfh2LinePositions.Keys.FirstOrDefault();
// Get duplicates in the TextFileHasher itself
if (tfh != null && !alreadyGotDuplicates.Contains(tfh)) {
totalDuplicates.AddRange(tfh.GetDuplicates());
alreadyGotDuplicates.Add(tfh);
}
if (tfh2LinePositions.Count <= 1) {
continue;
}
// Algo to get duplicates in more than 1 TextFileHashers
var tfhs = tfh2LinePositions.Keys.ToArray();
for (int i = 0; i < tfhs.Length; i++) {
var tfh1Positions = tfhs[i].GetLinePositions(hash);
for (int j = i + 1; j < tfhs.Length; j++) {
var tfh2Positions = tfhs[j].GetLinePositions(hash);
for (int k = 0; k < tfh1Positions.Count; k++) {
var tfh1Pos = tfh1Positions[k];
var tfh1ByteArray = tfhs[i].GetLineAsByteArray(tfh1Pos);
for (int m = 0; m < tfh2Positions.Count; m++) {
var tfh2Pos = tfh2Positions[m];
if (tfh1Pos.Length != tfh2Pos.Length)
continue;
var tfh2ByteArray = tfhs[j].GetLineAsByteArray(tfh2Pos);
if (tfh1ByteArray.SequenceEqual(tfh2ByteArray)) {
var line = System.Text.Encoding.Default.GetString(tfh1ByteArray);
totalDuplicates.Add(line);
}
}
}
}
}
}
Console.WriteLine();
if (totalDuplicates.Count > 0) {
Console.WriteLine("Total number of duplicates: {0}", totalDuplicates.Count);
Console.WriteLine("#######################");
totalDuplicates.ForEach(x => Console.WriteLine("{0}", x));
Console.WriteLine("#######################");
}
// Free resources
foreach (var tfh in textFileHashers)
tfh.Dispose();
}
}

If you have tons of ram... You guys are overthinking it...
var fileLines = File.ReadAllLines(#"c:\file.csv").Distinct();

Parsing mesh with more than 65k-vertices in Unity at runtime

I'm trying to load OBJ models in runtime using Unity and C#. I'm using Unity's Wiki parser "FastOBJImporter" http://wiki.unity3d.com/index.php/FastObjImporter for parsing the OBJ files.
I can't load a mesh with more than 65,534 vertices since it's a Unity limitation ( http://answers.unity3d.com/questions/471639/mesh-with-more-than-65000-vertices.html )
My idea is passing a big mesh path to FastOBJImporter and generate multiple GameObjects with less than 65k vertices in order to load bigger models.
Does anybody know how can I modify FastOBJimporter safely in order to return a list of sub-meshes instead of a big mesh? Any other solutions/ideas are welcomed.

This script does what you want but with STL model files. It imports the model, no matter how large, by splitting the mesh into submeshes each time 65k verts is reached. STL files can be converted to OBJ files so, I imagine, using a simple converter, or altering the script, could do the trick.
Code below (I take no credit for code).
#pragma warning disable 0219
using UnityEngine;
using System.Text;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System;
using UnityEngine.UI;
using UnityEngine.EventSystems;
using System.Threading;
namespace Parabox.STL
{
/*Import methods for STL files*/
public class STL_ImportScript : MonoBehaviour
{
const int MAX_FACETS_PER_MESH = 65535 / 3;
class Facet
{
public Vector3 normal;
public Vector3 a, b, c;
public override string ToString()
{
return string.Format("{0:F2}: {1:F2}, {2:F2}, {3:F2}", normal, a, b, c);
}
}
/**
* Import an STL file at path.
*/
public static Mesh[] Import(string path)
{
try
{
return ImportBinary(path);
}
catch (System.Exception e)
{
UnityEngine.Debug.LogWarning(string.Format("Failed importing mesh at path {0}.\n{1}", path, e.ToString()));
return null;
}
}
private static Mesh[] ImportBinary(string path)
{
List<Facet> facets = new List<Facet>();
byte[] header;
uint facetCount;
using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read))
{
using (BinaryReader br = new BinaryReader(fs, new ASCIIEncoding()))
{
while (br.BaseStream.Position < br.BaseStream.Length)
{
// read header
header = br.ReadBytes(80);
facetCount = br.ReadUInt32();
for (uint i = 0; i < facetCount; i++)
{
try
{
Facet facet = new Facet();
facet.normal.x = br.ReadSingle();
facet.normal.y = br.ReadSingle();
facet.normal.z = br.ReadSingle();
facet.a.x = br.ReadSingle();
facet.a.y = br.ReadSingle();
facet.a.z = br.ReadSingle();
facet.b.x = br.ReadSingle();
facet.b.y = br.ReadSingle();
facet.b.z = br.ReadSingle();
facet.c.x = br.ReadSingle();
facet.c.y = br.ReadSingle();
facet.c.z = br.ReadSingle();
facets.Add(facet);
// padding
br.ReadUInt16();
}
catch (Exception e)
{
//Console.WriteLine(e.Message);
Debug.Log(e.Message);
}
}
}
}
}
return CreateMeshWithFacets(facets);
}
const int SOLID = 1;
const int FACET = 2;
const int OUTER = 3;
const int VERTEX = 4;
const int ENDLOOP = 5;
const int ENDFACET = 6;
const int ENDSOLID = 7;
const int EMPTY = 0;
private static int ReadState(string line)
{
if (line.StartsWith("solid"))
return SOLID;
else if (line.StartsWith("facet"))
return FACET;
else if (line.StartsWith("outer"))
return OUTER;
else if (line.StartsWith("vertex"))
return VERTEX;
else if (line.StartsWith("endloop"))
return ENDLOOP;
else if (line.StartsWith("endfacet"))
return ENDFACET;
else if (line.StartsWith("endsolid"))
return ENDSOLID;
else
return EMPTY;
}
private static Vector3 StringToVec3(string str)
{
string[] split = str.Trim().Split(null);
Vector3 v = new Vector3();
float.TryParse(split[0], out v.x);
float.TryParse(split[1], out v.y);
float.TryParse(split[2], out v.z);
return v;
}
private static Mesh[] CreateMeshWithFacets(IList<Facet> facets)
{
int fl = facets.Count, f = 0, mvc = MAX_FACETS_PER_MESH * 3;
Mesh[] meshes = new Mesh[fl / MAX_FACETS_PER_MESH + 1];
for (int i = 0; i < meshes.Length; i++)
{
int len = System.Math.Min(mvc, (fl - f) * 3);
Vector3[] v = new Vector3[len];
Vector3[] n = new Vector3[len];
int[] t = new int[len];
for (int it = 0; it < len; it += 3)
{
v[it] = facets[f].a;
v[it + 1] = facets[f].b;
v[it + 2] = facets[f].c;
n[it] = facets[f].normal;
n[it + 1] = facets[f].normal;
n[it + 2] = facets[f].normal;
t[it] = it;
t[it + 1] = it + 1;
t[it + 2] = it + 2;
f++;
}
meshes[i] = new Mesh();
meshes[i].vertices = v;
meshes[i].normals = n;
meshes[i].triangles = t;
}
return meshes;
}
}
}
Another solution could be to dynamically combine vertices that share the same space. When you bring in your models at run time you combine vertices that are within a certain threshold of each other to reduce total vertex count below the 65k limit.
Alternatively, use tools, such as Mesh Simplify to lower the level of detail, allowing you to optimize performance and reduce polygon counts below 65k to import your meshes without surpassing the 65k limit. There are different, and more affordable options, but this seemed to be one of the better ones available.

The current top answer here is nicely detailed, but the whole problem can be solved with 1 line of code:
myMeshFilter.mesh = new Mesh() { indexFormat = UnityEngine.Rendering.IndexFormat.UInt32 }; // To support meshes > 65k verts
Correctly setting the index format for the mesh will allow for a bigger list of verts.

Generate multiple unique strings in C#

For my project, I have to generate a list of unique strings.
Everything works fine, but my problem is that it is very slow at the end.
I've tried using Parallel-Loops, but I found out that my ConcurrentBag<T>, which I was using, is also slow.
Now I'm using a simple for-loop and List<T> and it is now a little bit faster, but also really slow.
Here's my code:
private List<string> Generate(int start, int end, bool allowDupes)
{
var list = new List<string>();
var generator = new StringGenerator(LowerCase, UpperCase, Digits, NumberOfCharacters);
for (var i = start; i < end; i++)
{
StringBuilder sb;
while (true)
{
sb = new StringBuilder();
for (var j = 0; j < NumberOfSegments; j++)
{
sb.Append(generator.GenerateRandomString());
if (j < NumberOfSegments - 1)
{
sb.Append(Delimiter);
}
}
if (!allowDupes)
{
if (list.Contains(sb.ToString()))
{
continue;
}
}
break;
}
list.Add(sb.ToString());
GeneratedStringCount = i + 1;
}
return new List<string>(list);
}
I've also talked to my teacher and he would use the same algorithm for generating these strings.
Do you know a better solution? (The GenerateRandomString() Method in StringGenerator is simple and does not consume much performance. list.Contains(xy) is consuming alot of resources. [Performance Analysis in Visual Studio])

List.Contains is slow. Use a HashSet instead.
private List<string> Generate(int start, int end, bool allowDupes)
{
var strings = new HashSet<string>();
var list = new List<string>();
var generator = new StringGenerator(LowerCase, UpperCase, Digits, NumberOfCharacters);
for (var i = start; i < end; i++)
{
while (true)
{
string randomString = GetRandomString();
if (allowDupes || strings.Add(randomString))
{
list.Add(randomString);
break;
}
}
GeneratedStringCount = i + 1;
}
return new List<string>(list);
}
private string GetRandomString()
{
var segments = Enumerable.Range(1, NumberOfSegments)
.Select(_ => generator.GenerateRandomString());
var result = string.Join(Delimeter, segments);
return result;
}
This still has the chance for slow performance, but you could remedy that with a smart GenerateRandomString function.

public static String GenerateEightCode( int codeLenght, Boolean isCaseSensitive)
{
char[] chars = GetCharsForCode(isCaseSensitive);
byte[] data = new byte[1];
RNGCryptoServiceProvider crypto = new RNGCryptoServiceProvider();
crypto.GetNonZeroBytes(data);
data = new byte[codeLenght];
crypto.GetNonZeroBytes(data);
StringBuilder sb = new StringBuilder(codeLenght);
foreach (byte b in data)
{
sb.Append(chars[b % (chars.Length)]);
}
string key = sb.ToString();
if (codeLenght == 8)
key = key.Substring(0, 4) + "-" + key.Substring(4, 4);
else if (codeLenght == 16)
key = key.Substring(0, 4) + "-" + key.Substring(4, 4) + "-" + key.Substring(8, 4) + "-" + key.Substring(12, 4);
return key.ToString();
}
private static char[] GetCharsForCode(Boolean isCaseSensitive)
{
// all - abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890
char[] chars = new char[58];
if (isCaseSensitive)
{
chars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ123456789".ToCharArray();//počet unikátních kombinací 4 - 424 270, 8 - 1 916 797 311, 16 - 7.99601828013E+13
}
else
{
chars = new char[35];
chars = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789".ToCharArray();//počet unikátních kombinací 4 - 52 360, 8 - 23 535 820, 16 - 4 059 928 950
}
return chars;
}

getting the best record from a file

I have a file with the following text inside
mimi,m,70
tata,f,60
bobo,m,100
soso,f,30
I did the reading from file thing and many many other methods and functions, but how I can get the best male name and his grade according to the grade.
here is the code I wrote. Hope it's not so long
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
namespace practice_Ex
{
class Program
{
public static int[] ReadFile(string FileName, out string[] Name, out char[] Gender)
{
Name = new string[1];
int[] Mark = new int[1];
Gender = new char[1];
if (File.Exists(FileName))
{
FileStream Input = new FileStream(FileName, FileMode.Open, FileAccess.Read);
StreamReader SR = new StreamReader(Input);
string[] Current;
int Counter = 0;
string Str = SR.ReadLine();
while (Str != null)
{
Current = Str.Split(',');
Name[Counter] = Current[0];
Mark[Counter] = int.Parse(Current[2]);
Gender[Counter] = char.Parse(Current[1].ToUpper());
Counter++;
Array.Resize(ref Name, Counter + 1);
Array.Resize(ref Mark, Counter + 1);
Array.Resize(ref Gender, Counter + 1);
Str = SR.ReadLine();
}
}
return Mark;
}
public static int MostFreq(int[] M, out int Frequency)
{
int Counter = 0;
int Frequent = 0;
Frequency = 0;
for (int i = 0; i < M.Length; i++)
{
Counter = 0;
for (int j = 0; j < M.Length; j++)
if (M[i] == M[j])
Counter++;
if (Counter > Frequency)
{
Frequency = Counter;
Frequent = M[i];
}
}
return Frequent;
}
public static int Avg(int[] M)
{
int total = 0;
for (int i = 0; i < M.Length; i++)
total += M[i];
return total / M.Length;
}
public static int AvgCond(char[] G, int[] M, char S)
{
int total = 0;
int counter = 0;
for (int i = 0; i < G.Length; i++)
if (G[i] == S)
{
total += M[i];
counter++;
}
return total / counter;
}
public static int BelowAvg(int[] M, out int AboveAvg)
{
int Bcounter = 0;
AboveAvg = 0;
for (int i = 0; i < M.Length; i++)
{
if (M[i] < Avg(M))
Bcounter++;
else
AboveAvg++;
}
return Bcounter;
}
public static int CheckNames(string[] Name, char C)
{
C = char.Parse(C.ToString().ToLower());
int counter = 0;
string Str;
for (int i = 0; i < Name.Length - 1; i++)
{
Str = Name[i].ToLower();
if (Str[0] == C || Str[Str.Length - 1] == C)
counter++;
}
return counter;
}
public static void WriteFile(string FileName, string[] Output)
{
FileStream FS = new FileStream(FileName, FileMode.OpenOrCreate, FileAccess.Write);
StreamWriter SW = new StreamWriter(FS);
for (int i = 0; i < Output.Length; i++)
SW.WriteLine(Output[i]);
}
static void Main(string[] args)
{
int[] Mark;
char[] Gender;
string[] Name;
string[] Output = new string[8];
int Frequent, Frequency, AvgAll, MaleAvg, FemaleAvg, BelowAverage, AboveAverage, NamesCheck;
Mark = ReadFile("c:\\IUST1.txt", out Name, out Gender);
Frequent = MostFreq(Mark, out Frequency);
AvgAll = Avg(Mark);
MaleAvg = AvgCond(Gender, Mark, 'M');
FemaleAvg = AvgCond(Gender, Mark, 'F');
BelowAverage = BelowAvg(Mark, out AboveAverage);
NamesCheck = CheckNames(Name, 'T');
Output [0]= "Frequent Mark = " + Frequent.ToString();
Output [1]= "Frequency = " + Frequency.ToString();
Output [2]= "Average Of All = " + AvgAll.ToString();
Output [3]= "Average Of Males = " + MaleAvg.ToString();
Output [4]= "Average Of Females = " + FemaleAvg.ToString();
Output [5]= "Below Average = " + BelowAverage.ToString();
Output [6]= "Above Average = " + AboveAverage.ToString();
Output [7]= "Names With \"T\" = " + NamesCheck.ToString();
WriteFile("c:\\Output.txt", Output);
}
}
}

Well, I like LINQ (update: excluded via comments) for querying, especially if I can do it without buffering the data (so I can process a huge file efficiently). For example below (update: removed LINQ); note the use of iterator blocks (yield return) makes this fully "lazy" - only one record is held in memory at a time.
This also shows separation of concerns: one method deals with reading a file line by line; one method deals with parsing a line into a typed data record; one (or more) method(s) work with those data record(s).
using System;
using System.Collections.Generic;
using System.IO;
enum Gender { Male, Female, Unknown }
class Record
{
public string Name { get; set; }
public Gender Gender { get; set; }
public int Score { get; set; }
}
static class Program
{
static IEnumerable<string> ReadLines(string path)
{
using (StreamReader reader = File.OpenText(path))
{
string line;
while ((line = reader.ReadLine()) != null)
{
yield return line;
}
}
}
static IEnumerable<Record> Parse(string path)
{
foreach (string line in ReadLines(path))
{
string[] segments = line.Split(',');
Gender gender;
switch(segments[1]) {
case "m": gender = Gender.Male; break;
case "f": gender = Gender.Female; break;
default: gender = Gender.Unknown; break;
}
yield return new Record
{
Name = segments[0],
Gender = gender,
Score = int.Parse(segments[2])
};
}
}
static void Main()
{
Record best = null;
foreach (Record record in Parse("data.txt"))
{
if (record.Gender != Gender.Male) continue;
if (best == null || record.Score > best.Score)
{
best = record;
}
}
Console.WriteLine("{0}: {1}", best.Name, best.Score);
}
}
The advantage of writing things as iterators is that you can easily use either streaming or buffering - for example, you can do:
List<Record> data = new List<Record>(Parse("data.txt"));
and then manipulate data all day long (assuming it isn't too large) - useful for multiple aggregates, mutating data, etc.

This question asks how to find a maximal element by a certain criterion. Combine that with Marc's LINQ part and you're away.

In the real world, of course, these would be records in a database, and you would use one line of SQL to select the best record, ie:
SELECT Name, Score FROM Grades WHERE Score = MAX(Score)
(This returns more than one record where there's more than one best record, of course.) This is an example of the power of using the right tool for the job.

I think the fastest and least-code way would be to transform the txt to xml and then use Linq2Xml to select from it. Here's a link.
Edit: That might be more work than you'd like to do. Another option is to create a class called AcademicRecord that has properties for the persons name gender etc. Then when you read the file, add to a List for each line in the file. Then use a Sort predicate to sort the list; the highest record would then be the first one in the list. Here's a link.

Your assignment might have different requirements, but if you only want to get "best male name and grade" from a file you described, a compact way is:
public String FindRecord()
{
String[] lines = File.ReadAllLines("MyFile.csv");
Array.Sort(lines, CompareByBestMaleName);
return lines[0];
}
int SortByBestMaleName(String a, String b)
{
String[] ap = a.Split();
String[] bp = b.Split();
// Always rank male higher
if (ap[1] == "m" && bp[1] == "f") { return 1; }
if (ap[1] == "f" && bp[1] == "m") { return -1; }
// Compare by score
return int.Parse(ap[2]).CompareTo(int.Parse(bp[2]));
}
Note that this is neither fast nor robust.

We Keep Coding

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

C# - A faster alternative to Convert.ToSingle() - c#

Related

Array.Contains() is always false

Importing and removing duplicates from a massive amount of text files using C# and Redis

Parsing mesh with more than 65k-vertices in Unity at runtime

Generate multiple unique strings in C#

getting the best record from a file

Categories

Resources