I've been smashing my head against this problem for days and have tried a tons of different things. I've been all over the forums, tried everything I've seen with no luck. My issue could be that I don't have an override, but I can't figure out how to get that to work.
I want to check if an array of 5,000+ elements contains a user-entered word. The word gets entered character by character and combined into a string(guessString). And then I use .Contains() to see if that word is in an array.
***EDIT please see screenshots for debug logs WordArray Elements -- Debug Output -- Debug With whitespace detection -- Code that doesnt work
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using TMPro;
using UnityEngine.EventSystems;
using UnityEngine.UI;
using UnityEngine.SceneManagement;
using System.Linq;
public class WordAction : MonoBehaviour
{
TMP_Text m_textComponent;
TMP_Text m_currentSquare;
public TMP_Text[] squareArray;
List<string> dupKey = new List<string>();
public string[] WordArray;
public List<string> DictionaryList = new List<string>();
public TextAsset file;
[SerializeField]
Color correctColor, wrongColor, maybeColor;
[SerializeField]
float colorFadeSpeed, colorFadeTime; // 2f, 1f
public float waitTime;
public string levelID;
public string key;
public AudioSource revealSFX;
bool guess;
string guessString;
int guessRegulator;
int guessCount = 1;
int lessGuessCount; // Starts variable at value of first current row element
int maxGuessCount;
string[] guessStringArray = new string[1];
void Start()
{
for (int i = 0; i < 5; i++) // Duplicate key
{
dupKey.Add(key[i].ToString());
}
var content = file.text;
string[] AllWords = content.Split('\n');
WordArray = AllWords;
}
public void Magic()
{
StartCoroutine(CompareKey());
}
IEnumerator CompareKey()
{
guessRegulator++;
GuessRegulatorFunction();
lessGuessCount = (guessCount * 5) - 5; // Starts variable at value of first current row element
maxGuessCount = guessCount * 5;
guessCount++; // Moves to next row
int k = 0; // Indexer for key[]
int cW = 0; // Indexer for CombineWord()
GameObject keyGO; // Keyboard GO
for (int i = lessGuessCount; i < maxGuessCount; i++)
{
if (cW < 1)
{
CombineWord(i);
cW++;
}
bool match = WordArray.Contains(guessString); // not working
Debug.Log(match);
if (match)
{
//do stuff
}
//compare stuff
string guessStr = squareArray[i].text.ToString();
string keyStr = key[k].ToString();
bool result = guessStr == keyStr;
if (!result && !dupKey.Contains(guessStr))
{
//wrong stuff
GameObject parentGO = squareArray[i].transform.parent.gameObject; // Gets parent of SquareArray element
Image parentImage = parentGO.GetComponent<Image>(); // Gets Image component of parent game object
keyGO = GameObject.Find(squareArray[i].text); // Keyboard
Image keyParentImage = keyGO.GetComponent<Image>(); // Keyboard
wrongColor.a = 255;
keyParentImage.color = wrongColor;
parentImage.color = wrongColor;
yield return null;
}
if (result)
{
//correct stuff
dupKey[k] = "";
GameObject parentGO = squareArray[i].transform.parent.gameObject; // Gets parent of SquareArray element
Image parentImage = parentGO.GetComponent<Image>(); // Gets Image component of parent game object
keyGO = GameObject.Find(squareArray[i].text); // Keyboard
Image keyParentImage = keyGO.GetComponent<Image>(); // Keyboard
correctColor.a = 255;
keyParentImage.color = correctColor;
parentImage.color = correctColor;
yield return null;
}
if (!result && dupKey.Contains(guessStr))
{
//yellow stuff
for (int x = 0; x < 5; x++) // Duplicate key
{
if (guessStr == dupKey[x])
{
dupKey[x] = "";
}
}
GameObject parentGO = squareArray[i].transform.parent.gameObject; // Gets parent of SquareArray element
Image parentImage = parentGO.GetComponent<Image>(); // Gets Image component of parent game object
keyGO = GameObject.Find(squareArray[i].text); // Keyboard
Image keyParentImage = keyGO.GetComponent<Image>(); // Keyboard
maybeColor.a = 255;
keyParentImage.color = maybeColor;
parentImage.color = maybeColor;
yield return null;
}
revealSFX.Play();
k++;
yield return new WaitForSeconds(waitTime);
}
dupKey.Clear();
for (int i = 0; i < 5; i++) // Duplicate key
{
dupKey.Add(key[i].ToString());
}
}
void GuessRegulatorFunction()
{
guessRegulator++; // Stops multiple guess attempts
for (int i = 0; i < (guessCount * 5); i++) // Checks if row is blank when guessing
{
if (squareArray[i].text == "")
{
guess = false;
guessRegulator = 0; // Resets guess regulator
break;
}
else
{
guess = true;
}
}
if (guessRegulator > 1 || guess == false) // Stops multiple guess attempts
{
return;
}
}
public void BackSpace()
{
for (int i = guessCount * 5; i > (guessCount * 5) - 6; i--)
{
if (squareArray[i].text != "")
{
squareArray[i].text = "";
break;
}
}
}
public void InputLetter()
{
guessRegulator = 0;
for (int i = 0; i < guessCount * 5; i++)
{
if (squareArray[i].text == "")
{
squareArray[i].text = EventSystem.current.currentSelectedGameObject.name.ToString();
break;
}
}
}
void CombineWord(int i)
{
var string1 = squareArray[i].text.ToString();
var string2 = squareArray[i + 1].text.ToString();
var string3 = squareArray[i + 2].text.ToString();
var string4 = squareArray[i + 3].text.ToString();
var string5 = squareArray[i + 4].text.ToString();
guessString = string1 + string2 + string3 + string4 + string5;
//Debug.Log(guessString);
}
}
I've taken your line of code that isn't working and copied it verbatim. I've then taken the data that you say is in the WordArray and guessString variables and set those up. Then I ran this:
var WordArray = new [] { "WHICH", "THERE", "THEIR", "ABOUT" };
var guessString= "THERE";
bool match = WordArray.Contains(guessString);
Console.WriteLine(match);
match comes out True.
Your variables do not contain the data you think they do.
It's likely that the content that you call .Split('\n') on actually contains Windows end of line markers, so a combination of "\r\n". Since you only split on '\n' it's likely that the '\r' remains in your strings and hence "THERE" does not match "THERE\r".
Try this instead:
.Split(new[] { Environment.NewLine }, StringSplitOptions.None);
If your code is run on either Windows or on Linux the above line works. Just watch out for files that mix the endings.
Well, your WordArray is always empty. You put your file content into a local variable called AllWords.
Your word will never be found in an empty array.
Related
i am leanring about custom editor and i have a problem is about SerializeObject, i create a 2D array
toggles and i found they all not working when i click them.I try anything I can do, but it still not work
Can anyone help me?
public override void OnInspectorGUI()
{
EditorGUI.BeginChangeCheck();
serializedObject.Update();
for (int i = 0; i < 9; i++)
{
EditorGUILayout.BeginHorizontal();
GUIStyle s1 = new GUIStyle();
s1.padding.top = 20;
s1.padding.left = 2;
if (i == 0)
{
EditorGUILayout.LabelField("Site " + (i+1).ToString(), s1, GUILayout.MaxWidth(40));
}
else
{
EditorGUILayout.LabelField("Site " + (i+1).ToString(), GUILayout.MaxWidth(40));
}
for (int j = 0; j < 9; j++)
{
SerializedProperty r = TheNewList.GetArrayElementAtIndex(i).FindPropertyRelative("road").GetArrayElementAtIndex(j);
SerializedProperty w = TheNewList.GetArrayElementAtIndex(i).FindPropertyRelative("weight").GetArrayElementAtIndex(j);
serializedObject.Update();
EditorGUILayout.BeginVertical();
if (i == 0)
{
EditorGUILayout.LabelField((j+1).ToString(), GUILayout.MaxWidth(10));
}
r.boolValue = EditorGUILayout.Toggle(m_Target.roadEditor[i].road[j]);
if (setWeight)
{
w.intValue = (int)EditorGUILayout.IntField((int)m_Target.roadEditor[i].weight[j], GUILayout.MaxWidth(21));
}
EditorGUILayout.EndVertical();
}
EditorGUILayout.EndHorizontal();
EditorGUILayout.Space();
}
serializedObject.ApplyModifiedProperties();
}
}
Remove the additional
serializedObject.Update();
in the for loop!
This loads in the current values into the serialized properties in every iteration. This means even if you change a value which is not the very last toggle the current value gets reloaded before the Editor reaches the line
serializedObject.ApplyModifiedProperties();
General note: Do not mix SerializedProperties and the direct access to your class fields via the target!
Best don't use the target at all ... there are some exceptions where you want to call a method or something but in general stick to only use SerializedProperty!
In your case you could simply do
public override void OnInspectorGUI()
{
// A little bonus see below
DrawScriptField();
// Note: you only want to begin a change check if you also end it somewhere
// with the according if(EditorGUI.EndChangeCheck()) { ... } block
//EditorGUI.BeginchangeCheck();
// This you only want to do once for each OnInspectorGUI iteration
serializedObject.Update();
for (var i = 0; i < 9; i++)
{
// Get all these only ONCE instead of in every iteration of the inner loop
var element = TheNewList.GetArrayElementAtIndex(i);
var road = element.FindPropertyRelative("road");
var weight = element.FindPropertyRelative("weight");
// I like to wrap my code blocks in { } when possible in order to keep
// it a bit cleaner. Does only owrk ofcourse if you don't declare variables
// you need later. On this way I don't forget to end the draw groups
EditorGUILayout.BeginHorizontal();
{
var s1 = new GUIStyle { padding = { top = 20, left = 2 } };
if (i == 0)
{
EditorGUILayout.LabelField("Site " + (i + 1), s1, GUILayout.MaxWidth(40));
}
else
{
EditorGUILayout.LabelField("Site " + (i + 1), GUILayout.MaxWidth(40));
}
for (var j = 0; j < 9; j++)
{
var r = road.GetArrayElementAtIndex(j);
var w = weight.GetArrayElementAtIndex(j);
EditorGUILayout.BeginVertical();
{
if (i == 0)
{
EditorGUILayout.LabelField((j + 1).ToString(), GUILayout.MaxWidth(10));
}
// By simply using PropertyField the Inspector automatically
// - choses the correct drawer according to the property type
// - reads and writes the value to the serializedproperty and handles undo/redo etc
EditorGUILayout.PropertyField(r, GUIContent.none, GUILayout.MaxWidth(10));
if (setWeight)
{
EditorGUILayout.PropertyField(w, GUIContent.none, GUILayout.MaxWidth(21));
}
}
EditorGUILayout.EndVertical();
}
}
EditorGUILayout.EndHorizontal();
EditorGUILayout.Space();
}
serializedObject.ApplyModifiedProperties();
}
// This you get as a little bonus from my side ;)
private void DrawScriptField()
{
EditorGUI.BeginDisabledGroup(true);
EditorGUILayout.ObjectField("Script", MonoScript.FromMonoBehaviour((YOURSCRIPTTYPE)target), typeof(YOURSCRIPTTYPE), false);
EditorGUI.EndDisabledGroup();
EditorGUILayout.Space();
}
i need help for fix my code from the lesson i get, i try to create simple script for scramble words in sentences like "the house is broken" became "broken the is house"..my code work as well but it scramble only by one word, like "THE" became "H.T.E",
i try to use string.split method but i dont understand where i must change the code was into array.
here my code and the result is
using UnityEngine;
using System.Collections;
using System.Collections.Generic;
using UnityEngine.UI;
[System.Serializable]
public class Word
{
public string word;
[Header("biarkan kosong untuk acak otomatis")]
public string desiredRandom;
public string GetString()
{
if (!string.IsNullOrEmpty(desiredRandom))
{
return desiredRandom;
}
string result = word;
// **I'm try to split string here where i try to input that into array**
string[] array = result.Split(' ');
foreach (string token in array)
{
Debug.Log((token).ToString());
word = token;
}
result = "";
List<char> characters = new List<char>(word.ToCharArray());
while (characters.Count > 0)
{
int indexChar = Random.Range(0, characters.Count - 1);
result += characters[indexChar];
characters.RemoveAt(indexChar);
}
return result;
}
}
public class WordScramble : MonoBehaviour
{
public Word[] words;
[Header("UI Reference")]
public CharObject prefab;
public Transform container;
public float space;
public float lerpSpeed = 5;
List<CharObject> charObjects = new List<CharObject>();
CharObject firstSelected;
public int currentWord;
public static WordScramble main;
void Awake()
{
main = this;
}
// Use this for initialization
void Start()
{
ShowScramble(currentWord);
}
// Update is called once per frame
void Update()
{
RepositionObject();
}
void RepositionObject()
{
if (charObjects.Count == 0)
{
return;
}
float center = (charObjects.Count - 1) / 2;
for (int i = 0; i < charObjects.Count; i++)
{
charObjects[i].rectTransform.anchoredPosition
= Vector2.Lerp(charObjects[i].rectTransform.anchoredPosition,
new Vector2((i - center) * space, 0), lerpSpeed * Time.deltaTime);
charObjects[i].index = i;
}
}
public void ShowScramble()
{
ShowScramble(Random.Range(0, words.Length - 1));
}
public void ShowScramble(int index)
{
charObjects.Clear();
foreach (Transform child in container)
{
Destroy(child.gameObject);
}
if (index > words.Length - 1)
{
Debug.LogError("index out of range between 0-" + (words.Length - 1).ToString());
return;
}
// string result = huruf ;
// foreach (string words is word());
char[] chars = words[index].GetString().ToCharArray();
foreach (char c in chars)
{
CharObject clone = Instantiate(prefab.gameObject).GetComponent<CharObject>();
clone.transform.SetParent(container);
charObjects.Add(clone.Init(c));
}
currentWord = index;
}
public void Swap(int indexA, int indexB)
{
CharObject tmpA = charObjects[indexA];
charObjects[indexA] = charObjects[indexB];
charObjects[indexB] = tmpA;
charObjects[indexA].transform.SetAsLastSibling();
charObjects[indexB].transform.SetAsLastSibling();
CheckWord();
}
public void Select(CharObject charObject)
{
if (firstSelected)
{
Swap(firstSelected.index, charObject.index);
// unselect
//firstSelected = null;
firstSelected.Select();
charObject.Select();
}
else
{
firstSelected = charObject;
}
}
public void UnSelect()
{
firstSelected = null;
}
public void CheckWord()
{
StartCoroutine(CoCheckWord());
}
IEnumerator CoCheckWord()
{
yield return new WaitForSeconds(0.5f);
string word = "";
foreach (CharObject charObject in charObjects)
{
word += charObject.character;
}
if (word == words[currentWord].word)
{
currentWord++;
ShowScramble(currentWord);
}
}
}
this for a result
maybe i can get help for this problem, i'm still learn for C#, i'm sorry if my code is mess up
Your code is assigning the splits in a loop to the word variable over and over, overwriting word at each time. Then you are splitting word into its chars and try to shuffle them. You should instead shuffle the array of splits.
Example using LINQ:
public static string[] ScrambleSentence(string sentence)
{
var random = new Random();
return sentence.Split(' ').OrderBy(x => random.Next()).ToArray();
}
Here is a simple way to randomize word positions in a text
var rd = new Random();
string[] words = text.Split(' ').OrderBy(w => rd.Next()).ToArray();
// If you want a simple string instead of an array of words
string rdText = string.Join(" ", words);
It tooks a lot of time to unfreeze (but sometimes it just crashes).
I was debugging whole code to find a place where the problem is and I found that there's something wrong while I was saving things to Player Prefs.
My "idea" why it is causing. Multiple times getting and setting prefs with different values.
Test code:
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;
public class test : MonoBehaviour
{
[SerializeField]
private Image bg;
[SerializeField]
private Text text;
//private void Start()
//{
// PlayerPrefs.SetString("dd", "x");
//}
//Update is called once per frame
//bool was = false;
int counter = 0;
System.DateTime lastUpdate;
bool active = false;
List<string> tmp = new List<string>() { "5", "7" };
private void Start()
{
lastUpdate = System.DateTime.UtcNow;
}
void Update()
{
System.DateTime now = System.DateTime.UtcNow;
System.TimeSpan diff = now - lastUpdate;
text.text = diff.ToString();
if (diff.TotalSeconds > 10)
{
active = !active;
lastUpdate = now;
}
if (active)
{
bg.color = Color.green;
//if ((System.DateTime.UtcNow - xyz).TotalSeconds > 5)
//if (counter > 5)
//{
//if (!was)
//{
// Debug.Log("XX_update");
//}
//was = true;
//string x = PlayerPrefs.GetString("dd");
for (int i = 0; i < 20; i++)
{
string x = PlayerPrefs.GetString("dd");
Set(5, i);
//string x = PlayerPrefs.GetString(CommonMethods.RandomString(5));
}
//}
//else
//{
// was = false;
// counter++;
//}
}
else
{
bg.color = Color.red;
}
}
private System.DateTime xyz = System.DateTime.UtcNow;
public void OnApplicationFocus(bool focus)
{
counter = 0;
Debug.Log("XX_onappfocus");
xyz = System.DateTime.UtcNow;
}
public void Set(double value, int x = 0)
{
//PlayerPrefs.SetString(name, value.ToString("E3"));
//string costam = random.NextDouble().ToString();
//PlayerPrefs.SetString(name, costam);
//UnityEngine.Debug.Log("XX_" + costam);
//List<string> tmp = new List<string>() { "98754.38238715351", "98754.38238415351" };
//string result = tmp.RandomElement();
string result = tmp[x % 2];
//PlayerPrefs.SetString(name, result);
PlayerPrefs.SetString("dd", result);
//PlayerPrefs.SetString(name, "1.054E+003");
}
}
I was thinking that maybe there's a problem with double's precision, but it wasn't. Then I checked if it is caused by parsing inside setstring methods, but it wasn't. Problem isn't connected with a key, so it can be random.
If you want to check this try to build this code with 2 different values in tmp string list (now it's 5 and 7) and there will be bug, then change these values to the same (f.e. 5 and 5). Problem will disappear. Also you can delete get or set from FOR loop and problem will also disappear.
I also made a tricky switch inside Update method to check if it is connected with this get / set pair so you can check it too.
Sorry for my english guys but this problem is so complex that I even don't know how to describe it. I am trying to solve it for 3 days (we need to solve this or make a 'walk around' because we can't use that app without this).
Thanks in advance.
This is a bit of a doozy and it's been a while since I worked with C#, so bear with me:
I'm running a jruby script to iterate through 900 files (5 Mb - 1500 Mb in size) to figure out how many dupes STILL exist within these (already uniq'd) files. I had little luck with awk.
My latest idea was to insert them into a local MongoDB instance like so:
db.collection('hashes').update({ :_id => hash}, { $inc: { count: 1} }, { upsert: true)
... so that later I could just query it like db.collection.where({ count: { $gt: 1 } }) to get all the dupes.
This is working great except it's been over 24 hours and at the time of writing I'm at 72,532,927 Mongo entries and growing.
I think Ruby's .each_line is bottlnecking the IO hardcore:
So what I'm thinking now is compiling a C# program which fires up a thread PER EACH FILE and inserts the line (md5 hash) into a Redis list.
From there, I could have another compiled C# program simply pop the values off and ignore the save if the count is 1.
So the questions are:
Will using a compiled file reader and multithreading the file reads significantly improve performance?
Is using Redis even necessary? With a tremendous amount of AWS memory, could I not just use the threads to fill some sort of a list atomically and proceed from there?
Thanks in advance.
Updated
New solution. Old solution. The main idea is to calculate dummy hashes(just sum of all chars in string) of each line and store it in Dictionary<ulong, List<LinePosition>> _hash2LinePositions. It's possible to have multiple hashes in the same stream and it solves by List in Dictionary Value. When the hashes are the same, we read and compare the strings from the streams. LinePosition is using for storing info about line - position in stream and its length. I don't have such huge files as you, but my tests shows that it works. Here is the full code:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
public class Solution
{
struct LinePosition
{
public long Start;
public long Length;
public LinePosition(long start, long count)
{
Start = start;
Length = count;
}
public override string ToString()
{
return string.Format("Start: {0}, Length: {1}", Start, Length);
}
}
class TextFileHasher : IDisposable
{
readonly Dictionary<ulong, List<LinePosition>> _hash2LinePositions;
readonly Stream _stream;
bool _isDisposed;
public HashSet<ulong> Hashes { get; private set; }
public string Name { get; private set; }
public TextFileHasher(string name, Stream stream)
{
Name = name;
_stream = stream;
_hash2LinePositions = new Dictionary<ulong, List<LinePosition>>();
Hashes = new HashSet<ulong>();
}
public override string ToString()
{
return Name;
}
public void CalculateFileHash()
{
int readByte = -1;
ulong dummyLineHash = 0;
// Line start position in file
long startPosition = 0;
while ((readByte = _stream.ReadByte()) != -1) {
// Read until new line
if (readByte == '\r' || readByte == '\n') {
// If there was data
if (dummyLineHash != 0) {
// Add line hash and line position to the dict
AddToDictAndHash(dummyLineHash, startPosition, _stream.Position - 1 - startPosition);
// Reset line hash
dummyLineHash = 0;
}
}
else {
// Was it new line ?
if (dummyLineHash == 0)
startPosition = _stream.Position - 1;
// Calculate dummy hash
dummyLineHash += (uint)readByte;
}
}
if (dummyLineHash != 0) {
// Add line hash and line position to the dict
AddToDictAndHash(dummyLineHash, startPosition, _stream.Position - startPosition);
// Reset line hash
dummyLineHash = 0;
}
}
public List<LinePosition> GetLinePositions(ulong hash)
{
return _hash2LinePositions[hash];
}
public List<string> GetDuplicates()
{
List<string> duplicates = new List<string>();
foreach (var key in _hash2LinePositions.Keys) {
List<LinePosition> linesPos = _hash2LinePositions[key];
if (linesPos.Count > 1) {
duplicates.AddRange(FindExactDuplicates(linesPos));
}
}
return duplicates;
}
public void Dispose()
{
if (_isDisposed)
return;
_stream.Dispose();
_isDisposed = true;
}
private void AddToDictAndHash(ulong hash, long start, long count)
{
List<LinePosition> linesPosition;
if (!_hash2LinePositions.TryGetValue(hash, out linesPosition)) {
linesPosition = new List<LinePosition>() { new LinePosition(start, count) };
_hash2LinePositions.Add(hash, linesPosition);
}
else {
linesPosition.Add(new LinePosition(start, count));
}
Hashes.Add(hash);
}
public byte[] GetLineAsByteArray(LinePosition prevPos)
{
long len = prevPos.Length;
byte[] lineBytes = new byte[len];
_stream.Seek(prevPos.Start, SeekOrigin.Begin);
_stream.Read(lineBytes, 0, (int)len);
return lineBytes;
}
private List<string> FindExactDuplicates(List<LinePosition> linesPos)
{
List<string> duplicates = new List<string>();
linesPos.Sort((x, y) => x.Length.CompareTo(y.Length));
LinePosition prevPos = linesPos[0];
for (int i = 1; i < linesPos.Count; i++) {
if (prevPos.Length == linesPos[i].Length) {
var prevLineArray = GetLineAsByteArray(prevPos);
var thisLineArray = GetLineAsByteArray(linesPos[i]);
if (prevLineArray.SequenceEqual(thisLineArray)) {
var line = System.Text.Encoding.Default.GetString(prevLineArray);
duplicates.Add(line);
}
#if false
string prevLine = System.Text.Encoding.Default.GetString(prevLineArray);
string thisLine = System.Text.Encoding.Default.GetString(thisLineArray);
Console.WriteLine("PrevLine: {0}\r\nThisLine: {1}", prevLine, thisLine);
StringBuilder sb = new StringBuilder();
sb.Append(prevPos);
sb.Append(" is '");
sb.Append(prevLine);
sb.Append("'. ");
sb.AppendLine();
sb.Append(linesPos[i]);
sb.Append(" is '");
sb.Append(thisLine);
sb.AppendLine("'. ");
sb.Append("Equals => ");
sb.Append(prevLine.CompareTo(thisLine) == 0);
Console.WriteLine(sb.ToString());
#endif
}
else {
prevPos = linesPos[i];
}
}
return duplicates;
}
}
public static void Main(String[] args)
{
List<TextFileHasher> textFileHashers = new List<TextFileHasher>();
string text1 = "abc\r\ncba\r\nabc";
TextFileHasher tfh1 = new TextFileHasher("Text1", new MemoryStream(System.Text.Encoding.Default.GetBytes(text1)));
tfh1.CalculateFileHash();
textFileHashers.Add(tfh1);
string text2 = "def\r\ncba\r\nwet";
TextFileHasher tfh2 = new TextFileHasher("Text2", new MemoryStream(System.Text.Encoding.Default.GetBytes(text2)));
tfh2.CalculateFileHash();
textFileHashers.Add(tfh2);
string text3 = "def\r\nbla\r\nwat";
TextFileHasher tfh3 = new TextFileHasher("Text3", new MemoryStream(System.Text.Encoding.Default.GetBytes(text3)));
tfh3.CalculateFileHash();
textFileHashers.Add(tfh3);
List<string> totalDuplicates = new List<string>();
Dictionary<ulong, Dictionary<TextFileHasher, List<LinePosition>>> totalHashes = new Dictionary<ulong, Dictionary<TextFileHasher, List<LinePosition>>>();
textFileHashers.ForEach(tfh => {
foreach(var dummyHash in tfh.Hashes) {
Dictionary<TextFileHasher, List<LinePosition>> tfh2LinePositions = null;
if (!totalHashes.TryGetValue(dummyHash, out tfh2LinePositions))
totalHashes[dummyHash] = new Dictionary<TextFileHasher, List<LinePosition>>() { { tfh, tfh.GetLinePositions(dummyHash) } };
else {
List<LinePosition> linePositions = null;
if (!tfh2LinePositions.TryGetValue(tfh, out linePositions))
tfh2LinePositions[tfh] = tfh.GetLinePositions(dummyHash);
else
linePositions.AddRange(tfh.GetLinePositions(dummyHash));
}
}
});
HashSet<TextFileHasher> alreadyGotDuplicates = new HashSet<TextFileHasher>();
foreach(var hash in totalHashes.Keys) {
var tfh2LinePositions = totalHashes[hash];
var tfh = tfh2LinePositions.Keys.FirstOrDefault();
// Get duplicates in the TextFileHasher itself
if (tfh != null && !alreadyGotDuplicates.Contains(tfh)) {
totalDuplicates.AddRange(tfh.GetDuplicates());
alreadyGotDuplicates.Add(tfh);
}
if (tfh2LinePositions.Count <= 1) {
continue;
}
// Algo to get duplicates in more than 1 TextFileHashers
var tfhs = tfh2LinePositions.Keys.ToArray();
for (int i = 0; i < tfhs.Length; i++) {
var tfh1Positions = tfhs[i].GetLinePositions(hash);
for (int j = i + 1; j < tfhs.Length; j++) {
var tfh2Positions = tfhs[j].GetLinePositions(hash);
for (int k = 0; k < tfh1Positions.Count; k++) {
var tfh1Pos = tfh1Positions[k];
var tfh1ByteArray = tfhs[i].GetLineAsByteArray(tfh1Pos);
for (int m = 0; m < tfh2Positions.Count; m++) {
var tfh2Pos = tfh2Positions[m];
if (tfh1Pos.Length != tfh2Pos.Length)
continue;
var tfh2ByteArray = tfhs[j].GetLineAsByteArray(tfh2Pos);
if (tfh1ByteArray.SequenceEqual(tfh2ByteArray)) {
var line = System.Text.Encoding.Default.GetString(tfh1ByteArray);
totalDuplicates.Add(line);
}
}
}
}
}
}
Console.WriteLine();
if (totalDuplicates.Count > 0) {
Console.WriteLine("Total number of duplicates: {0}", totalDuplicates.Count);
Console.WriteLine("#######################");
totalDuplicates.ForEach(x => Console.WriteLine("{0}", x));
Console.WriteLine("#######################");
}
// Free resources
foreach (var tfh in textFileHashers)
tfh.Dispose();
}
}
If you have tons of ram... You guys are overthinking it...
var fileLines = File.ReadAllLines(#"c:\file.csv").Distinct();
I'm working on a program which reads millions of floating point numbers from a text file. This program runs inside of a game that I'm designing, so I need it to be fast (I'm loading an obj file). So far, loading a relatively small file takes about a minute (without precompilation) because of the slow speed of Convert.ToSingle(). Is there a faster way to do this?
EDIT: Here's the code I use to parse the Obj file
http://pastebin.com/TfgEge9J
using System;
using System.IO;
using System.Collections.Generic;
using OpenTK.Math;
using System.Drawing;
using PlatformLib;
public class ObjMeshLoader
{
public static StreamReader[] LoadMeshes(string fileName)
{
StreamReader mreader = new StreamReader(PlatformLib.Platform.openFile(fileName));
MemoryStream current = null;
List<MemoryStream> mstreams = new List<MemoryStream>();
StreamWriter mwriter = null;
if (!mreader.ReadLine().Contains("#"))
{
mreader.BaseStream.Close();
throw new Exception("Invalid header");
}
while (!mreader.EndOfStream)
{
string cmd = mreader.ReadLine();
string line = cmd;
line = line.Trim(splitCharacters);
line = line.Replace(" ", " ");
string[] parameters = line.Split(splitCharacters);
if (parameters[0] == "mtllib")
{
loadMaterials(parameters[1]);
}
if (parameters[0] == "o")
{
if (mwriter != null)
{
mwriter.Flush();
current.Position = 0;
}
current = new MemoryStream();
mwriter = new StreamWriter(current);
mwriter.WriteLine(parameters[1]);
mstreams.Add(current);
}
else
{
if (mwriter != null)
{
mwriter.WriteLine(cmd);
mwriter.Flush();
}
}
}
mwriter.Flush();
current.Position = 0;
List<StreamReader> readers = new List<StreamReader>();
foreach (MemoryStream e in mstreams)
{
e.Position = 0;
StreamReader sreader = new StreamReader(e);
readers.Add(sreader);
}
return readers.ToArray();
}
public static bool Load(ObjMesh mesh, string fileName)
{
try
{
using (StreamReader streamReader = new StreamReader(Platform.openFile(fileName)))
{
Load(mesh, streamReader);
streamReader.Close();
return true;
}
}
catch { return false; }
}
public static bool Load2(ObjMesh mesh, StreamReader streamReader, ObjMesh prevmesh)
{
if (prevmesh != null)
{
//mesh.Vertices = prevmesh.Vertices;
}
try
{
//streamReader.BaseStream.Position = 0;
Load(mesh, streamReader);
streamReader.Close();
#if DEBUG
Console.WriteLine("Loaded "+mesh.Triangles.Length.ToString()+" triangles and"+mesh.Quads.Length.ToString()+" quadrilaterals parsed, with a grand total of "+mesh.Vertices.Length.ToString()+" vertices.");
#endif
return true;
}
catch (Exception er) { Console.WriteLine(er); return false; }
}
static char[] splitCharacters = new char[] { ' ' };
static List<Vector3> vertices;
static List<Vector3> normals;
static List<Vector2> texCoords;
static Dictionary<ObjMesh.ObjVertex, int> objVerticesIndexDictionary;
static List<ObjMesh.ObjVertex> objVertices;
static List<ObjMesh.ObjTriangle> objTriangles;
static List<ObjMesh.ObjQuad> objQuads;
static Dictionary<string, Bitmap> materials = new Dictionary<string, Bitmap>();
static void loadMaterials(string path)
{
StreamReader mreader = new StreamReader(Platform.openFile(path));
string current = "";
bool isfound = false;
while (!mreader.EndOfStream)
{
string line = mreader.ReadLine();
line = line.Trim(splitCharacters);
line = line.Replace(" ", " ");
string[] parameters = line.Split(splitCharacters);
if (parameters[0] == "newmtl")
{
if (materials.ContainsKey(parameters[1]))
{
isfound = true;
}
else
{
current = parameters[1];
}
}
if (parameters[0] == "map_Kd")
{
if (!isfound)
{
string filename = "";
for (int i = 1; i < parameters.Length; i++)
{
filename += parameters[i];
}
string searcher = "\\" + "\\";
filename.Replace(searcher, "\\");
Bitmap mymap = new Bitmap(filename);
materials.Add(current, mymap);
isfound = false;
}
}
}
}
static float parsefloat(string val)
{
return Convert.ToSingle(val);
}
int remaining = 0;
static string GetLine(string text, ref int pos)
{
string retval = text.Substring(pos, text.IndexOf(Environment.NewLine, pos));
pos = text.IndexOf(Environment.NewLine, pos);
return retval;
}
static void Load(ObjMesh mesh, StreamReader textReader)
{
//try {
//vertices = null;
//objVertices = null;
if (vertices == null)
{
vertices = new List<Vector3>();
}
if (normals == null)
{
normals = new List<Vector3>();
}
if (texCoords == null)
{
texCoords = new List<Vector2>();
}
if (objVerticesIndexDictionary == null)
{
objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>();
}
if (objVertices == null)
{
objVertices = new List<ObjMesh.ObjVertex>();
}
objTriangles = new List<ObjMesh.ObjTriangle>();
objQuads = new List<ObjMesh.ObjQuad>();
mesh.vertexPositionOffset = vertices.Count;
string line;
string alltext = textReader.ReadToEnd();
int pos = 0;
while ((line = GetLine(alltext, pos)) != null)
{
if (line.Length < 2)
{
break;
}
//line = line.Trim(splitCharacters);
//line = line.Replace(" ", " ");
string[] parameters = line.Split(splitCharacters);
switch (parameters[0])
{
case "usemtl":
//Material specification
try
{
mesh.Material = materials[parameters[1]];
}
catch (KeyNotFoundException)
{
Console.WriteLine("WARNING: Texture parse failure: " + parameters[1]);
}
break;
case "p": // Point
break;
case "v": // Vertex
float x = parsefloat(parameters[1]);
float y = parsefloat(parameters[2]);
float z = parsefloat(parameters[3]);
vertices.Add(new Vector3(x, y, z));
break;
case "vt": // TexCoord
float u = parsefloat(parameters[1]);
float v = parsefloat(parameters[2]);
texCoords.Add(new Vector2(u, v));
break;
case "vn": // Normal
float nx = parsefloat(parameters[1]);
float ny = parsefloat(parameters[2]);
float nz = parsefloat(parameters[3]);
normals.Add(new Vector3(nx, ny, nz));
break;
case "f":
switch (parameters.Length)
{
case 4:
ObjMesh.ObjTriangle objTriangle = new ObjMesh.ObjTriangle();
objTriangle.Index0 = ParseFaceParameter(parameters[1]);
objTriangle.Index1 = ParseFaceParameter(parameters[2]);
objTriangle.Index2 = ParseFaceParameter(parameters[3]);
objTriangles.Add(objTriangle);
break;
case 5:
ObjMesh.ObjQuad objQuad = new ObjMesh.ObjQuad();
objQuad.Index0 = ParseFaceParameter(parameters[1]);
objQuad.Index1 = ParseFaceParameter(parameters[2]);
objQuad.Index2 = ParseFaceParameter(parameters[3]);
objQuad.Index3 = ParseFaceParameter(parameters[4]);
objQuads.Add(objQuad);
break;
}
break;
}
}
//}catch(Exception er) {
// Console.WriteLine(er);
// Console.WriteLine("Successfully recovered. Bounds/Collision checking may fail though");
//}
mesh.Vertices = objVertices.ToArray();
mesh.Triangles = objTriangles.ToArray();
mesh.Quads = objQuads.ToArray();
textReader.BaseStream.Close();
}
public static void Clear()
{
objVerticesIndexDictionary = null;
vertices = null;
normals = null;
texCoords = null;
objVertices = null;
objTriangles = null;
objQuads = null;
}
static char[] faceParamaterSplitter = new char[] { '/' };
static int ParseFaceParameter(string faceParameter)
{
Vector3 vertex = new Vector3();
Vector2 texCoord = new Vector2();
Vector3 normal = new Vector3();
string[] parameters = faceParameter.Split(faceParamaterSplitter);
int vertexIndex = Convert.ToInt32(parameters[0]);
if (vertexIndex < 0) vertexIndex = vertices.Count + vertexIndex;
else vertexIndex = vertexIndex - 1;
//Hmm. This seems to be broken.
try
{
vertex = vertices[vertexIndex];
}
catch (Exception)
{
throw new Exception("Vertex recognition failure at " + vertexIndex.ToString());
}
if (parameters.Length > 1)
{
int texCoordIndex = Convert.ToInt32(parameters[1]);
if (texCoordIndex < 0) texCoordIndex = texCoords.Count + texCoordIndex;
else texCoordIndex = texCoordIndex - 1;
try
{
texCoord = texCoords[texCoordIndex];
}
catch (Exception)
{
Console.WriteLine("ERR: Vertex " + vertexIndex + " not found. ");
throw new DllNotFoundException(vertexIndex.ToString());
}
}
if (parameters.Length > 2)
{
int normalIndex = Convert.ToInt32(parameters[2]);
if (normalIndex < 0) normalIndex = normals.Count + normalIndex;
else normalIndex = normalIndex - 1;
normal = normals[normalIndex];
}
return FindOrAddObjVertex(ref vertex, ref texCoord, ref normal);
}
static int FindOrAddObjVertex(ref Vector3 vertex, ref Vector2 texCoord, ref Vector3 normal)
{
ObjMesh.ObjVertex newObjVertex = new ObjMesh.ObjVertex();
newObjVertex.Vertex = vertex;
newObjVertex.TexCoord = texCoord;
newObjVertex.Normal = normal;
int index;
if (objVerticesIndexDictionary.TryGetValue(newObjVertex, out index))
{
return index;
}
else
{
objVertices.Add(newObjVertex);
objVerticesIndexDictionary[newObjVertex] = objVertices.Count - 1;
return objVertices.Count - 1;
}
}
}
Based on your description and the code you've posted, I'm going to bet that your problem isn't with the reading, the parsing, or the way you're adding things to your collections. The most likely problem is that your ObjMesh.Objvertex structure doesn't override GetHashCode. (I'm assuming that you're using code similar to http://www.opentk.com/files/ObjMesh.cs.
If you're not overriding GetHashCode, then your objVerticesIndexDictionary is going to perform very much like a linear list. That would account for the performance problem that you're experiencing.
I suggest that you look into providing a good GetHashCode method for your ObjMesh.Objvertex class.
See Why is ValueType.GetHashCode() implemented like it is? for information about the default GetHashCode implementation for value types and why it's not suitable for use in a hash table or dictionary.
Edit 3: The problem is NOT with the parsing.
It's with how you read the file. If you read it properly, it would be faster; however, it seems like your reading is unusually slow. My original suspicion was that it was because of excess allocations, but it seems like there might be other problems with your code too, since that doesn't explain the entire slowdown.
Nevertheless, here's a piece of code I made that completely avoids all object allocations:
static void Main(string[] args)
{
long counter = 0;
var sw = Stopwatch.StartNew();
var sb = new StringBuilder();
var text = File.ReadAllText("spacestation.obj");
for (int i = 0; i < text.Length; i++)
{
int start = i;
while (i < text.Length &&
(char.IsDigit(text[i]) || text[i] == '-' || text[i] == '.'))
{ i++; }
if (i > start)
{
sb.Append(text, start, i - start); //Copy data to the buffer
float value = Parse(sb); //Parse the data
sb.Remove(0, sb.Length); //Clear the buffer
counter++;
}
}
sw.Stop();
Console.WriteLine("{0:N0}", sw.Elapsed.TotalSeconds); //Only a few ms
}
with this parser:
const int MIN_POW_10 = -16, int MAX_POW_10 = 16,
NUM_POWS_10 = MAX_POW_10 - MIN_POW_10 + 1;
static readonly float[] pow10 = GenerateLookupTable();
static float[] GenerateLookupTable()
{
var result = new float[(-MIN_POW_10 + MAX_POW_10) * 10];
for (int i = 0; i < result.Length; i++)
result[i] = (float)((i / NUM_POWS_10) *
Math.Pow(10, i % NUM_POWS_10 + MIN_POW_10));
return result;
}
static float Parse(StringBuilder str)
{
float result = 0;
bool negate = false;
int len = str.Length;
int decimalIndex = str.Length;
for (int i = len - 1; i >= 0; i--)
if (str[i] == '.')
{ decimalIndex = i; break; }
int offset = -MIN_POW_10 + decimalIndex;
for (int i = 0; i < decimalIndex; i++)
if (i != decimalIndex && str[i] != '-')
result += pow10[(str[i] - '0') * NUM_POWS_10 + offset - i - 1];
else if (str[i] == '-')
negate = true;
for (int i = decimalIndex + 1; i < len; i++)
if (i != decimalIndex)
result += pow10[(str[i] - '0') * NUM_POWS_10 + offset - i];
if (negate)
result = -result;
return result;
}
it happens in a small fraction of a second.
Of course, this parser is poorly tested and has these current restrictions (and more):
Don't try parsing more digits (decimal and whole) than provided for in the array.
No error handling whatsoever.
Only parses decimals, not exponents! i.e. it can parse 1234.56 but not 1.23456E3.
Doesn't care about globalization/localization. Your file is only in a single format, so there's no point caring about that kind of stuff because you're probably using English to store it anyway.
It seems like you won't necessarily need this much overkill, but take a look at your code and try to figure out the bottleneck. It seems to be neither the reading nor the parsing.
Have you measured that the speed problem is really caused by Convert.ToSingle?
In the code you included, I see you create lists and dictionaries like this:
normals = new List<Vector3>();
texCoords = new List<Vector2>();
objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>();
And then when you read the file, you add in the collection one item at a time.
One of the possible optimizations would be to save total number of normals, texCoords, indexes and everything at the start of the file, and then initialize these collections by these numbers. This will pre-allocate the buffers used by collections, so adding items to the them will be pretty fast.
So the collection creation should look like this:
// These values should be stored at the beginning of the file
int totalNormals = Convert.ToInt32(textReader.ReadLine());
int totalTexCoords = Convert.ToInt32(textReader.ReadLine());
int totalIndexes = Convert.ToInt32(textReader.ReadLine());
normals = new List<Vector3>(totalNormals);
texCoords = new List<Vector2>(totalTexCoords);
objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>(totalIndexes);
See List<T> Constructor (Int32) and Dictionary<TKey, TValue> Constructor (Int32).
This related question is for C++, but is definitely worth a read.
For reading as fast as possible, you're probably going to want to map the file into memory and then parse using some custom floating point parser, especially if you know the numbers are always in a specific format (i.e. you're the one generating the input files in the first place).
I tested .Net string parsing once and the fastest function to parse text was the old VB Val() function. You could pull the relevant parts out of Microsoft.VisualBasic.Conversion Val(string)
Converting String to numbers
Comparison of relative test times (ms / 100000 conversions)
Double Single Integer Int(w/ decimal point)
14 13 6 16 Val(Str)
14 14 6 16 Cxx(Val(Str)) e.g., CSng(Val(str))
22 21 17 e! Convert.To(str)
23 21 16 e! XX.Parse(str) e.g. Single.Parse()
30 31 31 32 Cxx(str)
Val: fastest, part of VisualBasic dll, skips non-numeric,
ConvertTo and Parse: slower, part of core, exception on bad format (including decimal point)
Cxx: slowest (for strings), part of core, consistent times across formats