I've managed to save some information into a file. I want to be able to read that information and use it to create an item
public void Save()
{
StreamWriter writer;
writer = new StreamWriter("PlayerInventory.txt");
for (int i = 0; i < playerInventory.Length; i++)
{
if (playerInventory[i] != null)
{
if (playerInventory[i] is Sheild sheild)
{
writer.WriteLine("Sheild:" + sheild.Defence + "," + sheild.name + "," + sheild.description + "," + sheild.cost);
}
}
}
else
{}
}
I started using this code to load it but i don't know how to finish the code. Right now its split but i don't know how to assign it to anything.
For example sheild:20,Shield,this is a shield,100
Right now that's split into
20,
Shield,
this is a shield,
100
But I don't know who to assign those stuff to an array
public void Load()
{
StreamReader reader;
reader = new StreamReader("PlayerInventory.txt");
string line;
string[] currentLineData;
while (true)
{
try
{
line = reader.ReadLine();
if (line == null)
{
break;
}
if (line.Contains("Sheild"))
{
line.Replace("Sheild:", "");
currentLineData = line.Split(',');
}
}
catch
{
break;
}
}
reader.Close();}
it looks like you are designing a game.
If I were setting up an inventory that included a shield with stats, I'd look at creating a complex data type, using a "struct" for an inventory item.
Then I'd create an array of the inventory struct items, with one for each inventory slot - before loading my file in.
This could hold all the different info you need for any item that can be stored in inventory like Item Name, defense strength, attack strength etc.
Then, when you load your file in, all the right stuff will be in places with nice names etc. for you to easily find them later.
If you need an extra stat, add a row to your struct definition, then add a column to your file and you are ready to go.
Related
I add dicom files using the AddFile(dicomFile,name) method but the number of frames tag does not appear.
var sourcePath = Path.Combine(tempDirectory, "DICOM", $"PATIENT{i + 1}", $"STUDY{j + 1}", $"SERIES{k + 1}", $"SUBSERIES{l + 1}");
var dicomDir = new DicomDirectory { AutoValidate = false };
foreach (var file in new DirectoryInfo(tempDirectory).GetFiles("*.*", SearchOption.AllDirectories))
{
try
{
var dicomFile = DicomFile.Open(file.FullName);
if (dicomFile != null)
{
var referenceField = file.FullName.Replace(tempDirectory, string.Empty).Trim('\\');
dicomDir.AddFile(dicomFile, referenceField);
}
}
catch (Exception ex)
{
Log.Error(ex, ex.Message);
}
}
var dicomDirPath = Path.Combine(tempDirectory, "DICOMDIR");
dicomDir.Save(dicomDirPath);
resultDirectories.Add(dicomDirPath);
I also tried the addorupdate method but it doesn't work.
I use the fo-dicom library 4.0.7
When building a DICOMDIR with fo-dicom by iterative calling AddFile for each file, then you will get a DICOMDIR with all the required DicomTags. But of course there are a lot of tags that are optional and you can add them yourself.
The method AddFile returns an instance of type DicomDirectoryEntry, which gives you a reference to the patient record entry, the study record entry, the series record entry and the instance record entry. There you can add as many additional optional data that you wish. In your case it would look like
[...]
if (dicomFile != null)
{
var referenceField = file.FullName.Replace(tempDirectory, string.Empty).Trim('\\');
var entries = dicomDir.AddFile(dicomFile, referenceField);
// now you can add some additional data.
// but before adding values, make sure that those values are available
// in your original DicomFile to avoid NullReferenceExceptions.
if (dicomFile.Dataset.Contains(DicomTag.NumberOfFrames))
{
entries.InstanceRecord.AddOrUpdate(DicomTag.NumberOfFrames, dicomFile.Dataset.GetSingleValue<int>(DicomTag.NumberOfFrames));
}
}
First of all, I only found examples on google and here .json to object or .xml to object.
But I have a pre-defined code that I want to read in, edit and save exactly or only changed things.
For the context, it's about skin mods, i.e. paths to images and changing their properties
The File
Unit
{
economy : _.abcd.1234 {
player : _.defg.5678
vehicles: 10
vehicle[0] : _.hijk.9012
vehicle[1] :
... and so on
vehicle[9] : _.lmno.3456
assigned_vehicle: _.hijk.9012
...
}
... other things
vehicle : _.hijk.9012 {
license_plate: "M XX 69"
accessories: 41
accessories[0]: _.af25.1780
...
accessories[40]: _.6e68.a620
data_path: "/def/vehicle/mercedes/data.txt"
}
... other vehicles
... other accessories
accessory : _.af25.1780 {
offset: 4
paint_color: (1, 1, 1)
wear: 0
data_path: "/def/vehicle/t_wheel/single_385_55_steel.sii"
}
... other accessories
... more things with format
object : name {
property : value
property : object
property : count
property[x] : value
}
}
actually i only need the value of assigned_vehicle and then the part as an object, edited and then only write the part back.
Of course, it would be nicer to read everything in and save it completely again.
So basically my own serializer and deserializer for this format.
so what do you mean?
Read in file line by line and create and fill objects manually
or is there a better solution than creating umpteen objects and filling them by hand?
Can something like that work?
I don't know what a parser / serializer / deserializer should look like.
Or not even what my class is anyway
public static void ParseLocalSaveGame(SaveGame saveGame)
{
using (StreamReader sr = new StreamReader(saveGame.Path))
{
string line;
while ((line = sr.ReadLine()) != null)
{
if (line.Contains("Unit"))
{
// beginning of file
// jump to the first "usable" line
sr.ReadLine();
sr.ReadLine();
continue;
}
if (line.Contains(""))
{
// skip empty line
sr.ReadLine();
continue;
}
else if (line.Contains("economy"))
{
saveGame.Economy = new Economy(line.Replace("economy :", "").Replace(" ", "").Replace("{", ""));
ParseSaveGameEconomy(sr);
continue;
}
else if (line.Contains("vehicle"))
{
Vehicle vehicle = new Vehicle(line.Replace("vehicle :", "").Replace(" ", "").Replace("{", ""));
ParseSaveGameVehicle(sr, vehicle);
saveGame.Vehicles.Add(vehicle);
continue;
}
else if (line.Contains("accessory"))
{
Accessory accessory = new Accessory(line.Replace("accessory :", "").Replace(" ", "").Replace("{", ""));
ParseSaveGameAccessory(sr, accessory);
saveGame.Accessories.Add(accessory);
continue;
}
else if (line.Contains("}"))
{
//This should be the last line
continue;
}
else
{
// This should not happen!
// TODO: Write Error
}
}
}
}
First Off I have a File That Looks Like This:
//Manager Ids
ManagerName: FirstName_LastName
ManagerLoginId: 12345
And a Text Box That has a five digit code(ex. 12345) That gets entered. When the Enter Key Is pressed it is assigned to a String called: "EnteredEmployeeId", Then What I need is to search the Entire file above for "EnteredEmployeeId" and if it matches then it will open another page, if it doesn't find that number then display a message(That tells you no employee Id found).
So essentially Im trying to open a file search the entire document for the Id then return true or false to allow it too either display an error or open a new page, and reset the EnteredEmployeeId to nothing.
My Code So Far:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace Rent_a_Car
{
public partial class Employee_Login_Page : Form
{
public Employee_Login_Page()
{
InitializeComponent();
}
string ManagersPath = #"C:\Users\Name\Visual Studios Project Custom Files\Rent A Car Employee Id's\Managers\Manager_Ids.txt"; //Path To Manager Logins
string EnteredEmployeeId;
private void textBox1_TextChanged(object sender, EventArgs e)
{
}
private void Employee_Id_TextBox_KeyPress(object sender, KeyPressEventArgs e)
{
if (!char.IsControl(e.KeyChar) && !char.IsDigit(e.KeyChar) && //Checks Characters entered are Numbers Only and allows them
(e.KeyChar != '0'))
{
e.Handled = true;
}
else if (e.KeyChar == (char)13) //Checks if The "Enter" Key is pressed
{
EnteredEmployeeId = Employee_Id_TextBox.Text; //Assigns EnteredEmployeeId To the Entered Numbes In Text Box
bool result = ***IsNumberInFile***(EnteredEmployeeId, "ManagerLoginId:", ManagersPath);
if (result)
{
//open new window
}
else
{
MessageBox.Show("User Not Found");
}
}
}
}
}
This function will read through whole file and find if there is inserted code. It will work with strings (as it is output of your text box) and will return only true or false (employee is or is not in file) not his name, surname etc.
static bool IsNumberInFile(string numberAsString, string LineName, string FileName)
{
var lines = File.ReadAllLines(FileName);
foreach(var line in lines)
{
var trimmedLine = line.Replace(" ", ""); //To remove all spaces in file. Not expecting any spaces in the middle of number
if (!string.IsNullOrEmpty(trimmedLine) && trimmedLine.Split(':')[0].Equals(LineName) && trimmedLine.Split(':')[1].Equals(numberAsString))
return true;
}
return false;
}
//Example of use
String ManagersPath = #"C:\Users\Name\Visual Studios Project Custom Files\Employee Id's\Managers\Manager_Ids.txt"; //Path To Manager Logins
String EnteredEmployeeId;
private void textBox1_TextChanged(object sender, EventArgs e)
{
}
private void Employee_Id_TextBox_KeyPress(object sender, KeyPressEventArgs e)
{
if (!char.IsControl(e.KeyChar) && !char.IsDigit(e.KeyChar) && //Checks Characters entered are Numbers Only and allows them
(e.KeyChar != '0'))
{
e.Handled = true;
}
else if (e.KeyChar == (char)13) //Checks if The "Enter" Key is pressed
{
EnteredEmployeeId = Employee_Id_TextBox.Text; //Assigns EnteredEmployeeId To the Entered Numbes In Text Box
bool result = IsNumberInFile(EnteredEmployeeId, "ManagerLoginId" , ManagersPath)
if(result)
//User is in file
else
//User is not in file
}
}
}
Short answer
Is your question about how to read your file?
private bool ManagerExists(int managerId)
{
return this.ReadManagers().Where(manager => manager.Id == managerId).Any();
}
private IEnumerable<Manager> ReadManagers()
{
using (var reader = System.IO.File.OpenText(managersFileName))
{
while (!reader.EndOfStream)
{
string lineManagerName = reader.ReadLine();
string lineMangerId = reader.ReadLine();
string managerName = ExtractValue(lineManagerName);
int managerId = Int32.Parse(ExtractValue(lineManagerId));
yield return new Manager
{
Id = managerId,
Name = managerName,
}
}
}
private string ExtractValue(string text)
{
// the value of the read text starts after the space:
const char separator = ' ';
int indexSeparator = text.IndexOf(separator);
return text.SubString(indexSeparator + 1);
}
Long Answer
I see several problems in your design.
The most important thing is that you intertwine your manager handling with your form.
You should separate your concerns.
Apparently you have the notion of a sequence of Managers, each Manager has a Name (first name, last name) and a ManagerId, and in future maybe other properties.
This sequence is persistable: it is saved somewhere, and if you load it again, you have the same sequence of Managers.
In this version you want to be able to see if a Manager with a given ManagerId exists. Maybe in future you might want more functionality, like fetching information of a Manager with a certain Id, or Fetch All managers, or let's go crazy: Add / Remove / Change managers!
You see in this description I didn't mention your Forms at all. Because I separated it from your Forms, you can use it in other forms, or even in a class that has nothing to do with a Form, for instance you can use it in a unit test.
I described what I needed in such a general from, that in future I might even change it. Users of my persistable manager collection wouldn't even notice it: I can put it in a JSON file, or XML; I can save the data in a Dictionary, a database, or maybe even fetch it from the internet.
All that users need to know, is that they have to create an instance of the class, using some parameters, and bingo, you can fetch Managers.
You also give users the freedom to decide how the data is to be saved: if they want to save it in a JSON file, changes in your form class will be minimal.
An object that stores sequences of objects is quite often called a Repository.
Let's create some classes:
interface IManager
{
public int Id {get;}
public string Name {get; set;}
}
interface IManagerRepository
{
bool ManagerExists(int managerId);
// possible future extensions: Add / Retrieve / Update / Delete (CRUD)
IManager Add(IManager manager);
IManager Find(int managerId);
void Update(IManager manager);
void Delete(int ManagerId);
}
class Manager : IManager
{
public Id {get; set;}
public string Name {get; set;}
}
class ManagerFileRepository : IManagerRepository,
{
public ManagerFileRepository(string fileName)
{
// TODO implement
}
// TODO: implement.
}
The ManagerFileRepository saves the managers in a file. It hides for the outside world how the file is internally structured. It could be your file format, it could be a CSV-file, or JSON / XML.
I also separated an interface, so if you later decide to save the data somewhere else, for instance in a Dictionary (for unit tests), or in a database, users of your Repository class won't see the difference.
Let's first see if you can use this class.
class MyForm : Form
{
const string managerFileName = ...
private IManagerRepository ManagerRepository {get;}
public MyForm()
{
InitializeComponent();
this.ManagerRepository = new ManagerFileRepository(managerFileName);
}
public bool ManagerExists(int managerId)
{
return this.ManagerRepository.ManagerExists(managerId);
}
Now let's handle your keyPress:
private void Employee_Id_TextBox_KeyPress(object sender, KeyPressEventArgs e)
{
TextBox textBox = (TextBox)sender;
... // code about numbers and enter key
int enteredManagerId = Int32.Parse(textBox.Text);
bool managerExists = this.ManagerExists(enteredManagerId);
if (managerExists) { ... }
}
This code seems to do what you want in an easy way. It looks transparent. The managerRepository is testable, reusable, simple to extend or change, because users won't notice this. So the class looks good. Let's implement
Implement ManagerFileRepository
There are several ways to implement reading the file:
(1) Read everything at construction time
and keep the read data in memory. If you add Managers they are not saved until you say so. Advantages: after initial startup it is fast. You can make changes and later decide not to save them anyway, so it is just like editing any other file. Disadvantage: if your program crashes, you have lost your changes.
(2) Read the file every time you need information
Advantage: data is always up-to-date, even if others edited the file while your program runs. If you change the manager collection it is immediately saved, so other can use it.
Which solution you choose depends on the size of the file and the importance of never losing data. If you file contains millions of records, then maybe it wasn't very wise to save the data in a file. Consider SQLite to save it in a small fairly fast database.
class ManagerFileRepository : IManagerRepository, IEnumerable<IManager>
{
private readonly IDictionary<int, IManager> managers;
public ManagerFileRepository(string FileName)
{
this.managers = ReadManagers(fileName);
}
public bool ManagerExists(int managerId)
{
return this.Managers.HasKey(managerId);
}
private static IEnumerable<IManager> ReadManagers(string fileName)
{
// See the short answer above
}
}
Room for improvement
If you will be using your manager repository for more things, consider to let the repository implement ICollection<IManager> and IReadOnlyCollection<IManager>. This is quite simple:
public IEnumerable<IManager> GetEnumerator()
{
return this.managers.Values.GetEnumerator();
}
public void Add(IManager manager)
{
this.managers.Add(manager.Id, manager);
}
// etc.
If you add functions to change the manager collection you'll also need a Save method:
public void Save()
{
using (var writer = File.CreateText(FullFileName))
{
const string namePrefix = "ManagerName: ";
const string idPrefix = "ManagerLoginId: ";
foreach (var manager in managers.Values)
{
string managerLine = namePrefix + manager.Name;
writer.WriteLine(managerLine);
string idLine = idPrefix + manager.Id.ToString();
writer.WriteLine(idLine);
}
}
}
Another method of improvement: your file structure. Consider using a more standard file structure: CSV, JSON, XML. There are numerous NUGET packages (CSVHelper, NewtonSoft.Json) that makes reading and writing Managers much more robust.
Summary
Because you separated the concerns of persisting your managers from your form, you can reuse the manager repository, especially if you need functionality to Add / Retrieve / Update / Delete managers.
Because of the separation it is much easier to unit test your functions. And future changes won't hinder users of the repository, because they won't notice that the data has changed.
If your Manager_Ids.txt is in the following format, you can use File.ReadLine() method to traverse the text and query it.
ManagerName: FirstName_LastName1
ManagerLoginId: 12345
ManagerName: FirstName_LastName2
ManagerLoginId: 23456
...
Here is the demo that traverse the .txt.
string ManagersPath = #"D:\Manager_Ids.txt";
string EnteredEmployeeId;
private void textBox_id_KeyDown(object sender, KeyEventArgs e)
{
int counter = 0;
bool exist = false;
string line;
string str = "";
if (e.KeyCode == Keys.Enter)
{
EnteredEmployeeId = textBox_id.Text;
System.IO.StreamReader file =
new System.IO.StreamReader(ManagersPath);
while ((line = file.ReadLine()) != null)
{
str += line + "|";
if (counter % 2 != 0)
{
if (str.Split('|')[1].Split(':')[1].Trim() == EnteredEmployeeId)
{
str = str.Replace("|", "\n");
MessageBox.Show(str);
exist = true;
break;
}
str = "";
}
counter++;
}
if (!exist)
{
MessageBox.Show("No such id");
}
file.Close();
}
}
Besides, I recommend to use "xml", "json" or other formats to serialize the data. About storing the data in "xml", you can refer to the following simple demo.
<?xml version="1.0"?>
<Managers>
<Manager>
<ManagerName>FirstName_LastName1</ManagerName>
<ManagerLoginId>12345</ManagerLoginId>
</Manager>
<Manager>
<ManagerName>FirstName_LastName2</ManagerName>
<ManagerLoginId>23456</ManagerLoginId>
</Manager>
</Managers>
And then use LINQ to XML to query the id.
string ManagersPath = #"D:\Manager_Ids.xml";
string EnteredEmployeeId;
private void textBox_id_KeyDown(object sender, KeyEventArgs e)
{
if (e.KeyCode == Keys.Enter)
{
EnteredEmployeeId = textBox_id.Text;
XElement root = XElement.Load(ManagersPath);
IEnumerable<XElement> manager =
from el in root.Elements("Manager")
where (string)el.Element("ManagerLoginId") == EnteredEmployeeId
select el;
if(manager.Count() == 0)
{
MessageBox.Show("No such id");
}
foreach (XElement el in manager)
MessageBox.Show("ManagerName: " + (string)el.Element("ManagerName") + "\n"
+ "ManagerLoginId: " + (string)el.Element("ManagerLoginId"));
}
}
So, I am trying to upload a certain files with names like '2018-2-10 10-23-34' // February 10, 2018 10:23:34, this is not a one file only, I have like multiple files with names like these. That's why I use HttpFileCollection.
Now, for example that I selected files with file names like these, I want to check if it has the right file name, else it will just SaveAs as it is.
As you can see below, I added a fake code, its fake since its not working or it has a wrong syntax in it.
I saw a code like this, but I don't know how to apply this on my current code with HttpFileCollection, please help.
bool contains = Directory.EnumerateFiles(path).Any(f => f.Contains("three"));
My Code
protected void UploadButton_Click(object sender, EventArgs e)
{
if (FileUpload1.HasFile)
{
try
{
string date = DateTime.Now.ToString("yyyy-M-d");
DateTime DateValue;
DateValue = DateTime.Parse(date, CultureInfo.InvariantCulture);
string dayoftheweek = "(" + DateValue.ToString("dddd") + ")";
Response.Write(dayoftheweek);
string path = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
HttpFileCollection hfc = Request.Files;
for (int i = 0; i < hfc.Count; i++)
{
foreach (hfc[i].FileName.Contains(date))// What I am trying to do, but wrong syntax or wrong code
{
hfc[i].SaveAs(#path+"\\" + hfc[i].FileName + dayoftheweek);
}// What I am trying to do, but wrong syntax or wrong code
Response.Write(hfc[i].FileName);
hfc[i].SaveAs(#path+"\\" + hfc[i].FileName);
}
}
catch (Exception) { }
}
Your foreach loop is wrong as you mentioned.
//This will make you iterate trough the file collection
for (int i = 0; i < hfc.Count; i++)
{
if(hfc[i].FileName.Contains(date))
{
hfc[i].SaveAs(#path+"\\" + hfc[i].FileName + dayoftheweek);
}
}
The foreach loop is not needed anymore so you can remove
foreach (hfc[i].FileName.Contains(date))
{
}
The problem is your for each loop doesnt have a declaration
foreach(var variable in Enumerable){
//other code
}
so in your case it would be
foreach( var file in hfc)
{
//other code
}
and it should work just fine
I use iText5 for .NET to extract text from a PDF, by using below code.
private void button1_Click(object sender, EventArgs e)
{
PdfReader reader2 = new PdfReader("Scharfetter1969.pdf");
int pagen = reader2.NumberOfPages;
reader2.Close();
ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();
for (int i = 1; i < 2; i++)
{
textBox1.Text = "";
PdfReader reader = new PdfReader("Scharfetter1969.pdf");
String s = PdfTextExtractor.GetTextFromPage(reader, i, its);
s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s)));
textBox1.Text = s;
reader.Close();
}
}
But I want to get bibliographic data from research paper pdf.
Here is example of data which is extrected from this pdf (in endnote format), Here's a link!
%0 Journal Article
%T Repeated temperature modulation epitaxy for p-type doping and light-emitting diode based on ZnO
%A Tsukazaki, A.
%A Ohtomo, A.
%A Onuma, T.
%A Ohtani, M.
%A Makino, T.
%A Sumiya, M.
%A Ohtani, K.
%A Chichibu, S.F.
%A Fuke, S.
%A Segawa, Y.
%J Nature Materials
%V 4
%N 1
%P 42-46
%# 1476-1122
%D 2004
%I Nature Publishing Group
But remember that this is bibliographic information, it is not available in metadata of this pdf. I want to access Article Type (%O), Title (%T), Authors (%A), Date (%D) and (%I) and show it to different assigned textbox in window form.
I am using C# if any one have any code for this, or guide me how to do this.
PDF is a one-way format. You put data in so that it renders consistently on all devices (monitors, printers, etc) but the format was never intended to pull data back out. Any and all attempts to do that will be pure guess work. iText's PdfTextExtractor works but you are going to have to piece things together based on your own arbitrary set of rules, and these rules will probably change from PDF to PDF. The supplied PDF was created by InDesign which does such a great job of making text look good that it actually makes it even harder to parse the data back out.
That said, if your PDFs are all visually consistent, you could try to pull the data out while retaining formatting and use the formatting rules to guess what is what. That post will get you some HTML formatting that you could guess at. (If this actually works I'd recommend returning something more specific than HTML but I'll leave that up to you.)
Running it against your supplied PDF shows that the title is using the font HelveticaNeue-LightExt at about 17pts so you could write a rule to look for all lines that use that font at that size and combine them together. Authors are done in HelveticaNeue-Condensed at about 10pts so that's another rule.
The below code is a modified version of the one linked to above. Its a full working C# 2010 WinForms app targeting iTextSharp 5.1.1.0. It pulls out the title and authors for the supplied PDF but you'll need to tweak it for other PDFs and meta data. See the comments in the code for specific implementation details.
using System;
using System.Collections.Generic;
using System.Text;
using System.Windows.Forms;
using iTextSharp.text.pdf.parser;
using iTextSharp.text.pdf;
namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
PdfReader reader = new PdfReader(System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "nmat4-42.pdf"));
TextWithFontExtractionStategy S = new TextWithFontExtractionStategy();
string F = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, S);
//Buffers to hold various parts from the PDF
List<string> titles = new List<string>();
List<string> authors = new List<string>();
//Array of lines of text
string[] lines = F.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
//Temporary string
string t;
//Loop through each line in the array
foreach (string line in lines)
{
//See if the line looks like a "title"
if (line.Contains("HelveticaNeue-LightExt") && line.Contains("font-size:17.28003"))
{
//Remove the HTML tags
titles.Add(System.Text.RegularExpressions.Regex.Replace(line, "</?span.*?>", "").Trim());
}
//See if the line looks like an "author"
else if (line.Contains("HelveticaNeue-Condensed") && line.Contains("font-size:9.995972"))
{
//Remove the HTML tags and trim extra characters
t = System.Text.RegularExpressions.Regex.Replace(line, "</?span.*?>", "").Trim(new char[] { ' ', ',', '*' });
//Make sure we have a valid name, probably need some more exceptions here, too
if (!string.IsNullOrWhiteSpace(t) && t != "AND")
{
authors.Add(t);
}
}
}
//Write out the title to the console
Console.WriteLine("Title : {0}", string.Join(" ", titles.ToArray()));
//Write out each author
foreach (string author in authors)
{
Console.WriteLine("Author : {0}", author);
}
Console.WriteLine(F);
this.Close();
}
public class TextWithFontExtractionStategy : iTextSharp.text.pdf.parser.ITextExtractionStrategy
{
//HTML buffer
private StringBuilder result = new StringBuilder();
//Store last used properties
private Vector lastBaseLine;
private string lastFont;
private float lastFontSize;
//http://api.itextpdf.com/itext/com/itextpdf/text/pdf/parser/TextRenderInfo.html
private enum TextRenderMode
{
FillText = 0,
StrokeText = 1,
FillThenStrokeText = 2,
Invisible = 3,
FillTextAndAddToPathForClipping = 4,
StrokeTextAndAddToPathForClipping = 5,
FillThenStrokeTextAndAddToPathForClipping = 6,
AddTextToPaddForClipping = 7
}
public void RenderText(iTextSharp.text.pdf.parser.TextRenderInfo renderInfo)
{
string curFont = renderInfo.GetFont().PostscriptFontName;
//Check if faux bold is used
if ((renderInfo.GetTextRenderMode() == (int)TextRenderMode.FillThenStrokeText))
{
curFont += "-Bold";
}
//This code assumes that if the baseline changes then we're on a newline
Vector curBaseline = renderInfo.GetBaseline().GetStartPoint();
Vector topRight = renderInfo.GetAscentLine().GetEndPoint();
iTextSharp.text.Rectangle rect = new iTextSharp.text.Rectangle(curBaseline[Vector.I1], curBaseline[Vector.I2], topRight[Vector.I1], topRight[Vector.I2]);
Single curFontSize = rect.Height;
//See if something has changed, either the baseline, the font or the font size
if ((this.lastBaseLine == null) || (curBaseline[Vector.I2] != lastBaseLine[Vector.I2]) || (curFontSize != lastFontSize) || (curFont != lastFont))
{
//if we've put down at least one span tag close it
if ((this.lastBaseLine != null))
{
this.result.AppendLine("</span>");
}
//If the baseline has changed then insert a line break
if ((this.lastBaseLine != null) && curBaseline[Vector.I2] != lastBaseLine[Vector.I2])
{
this.result.AppendLine("<br />");
}
//Create an HTML tag with appropriate styles
this.result.AppendFormat("<span style=\"font-family:{0};font-size:{1}\">", curFont, curFontSize);
}
//Append the current text
this.result.Append(renderInfo.GetText());
//Set currently used properties
this.lastBaseLine = curBaseline;
this.lastFontSize = curFontSize;
this.lastFont = curFont;
}
public string GetResultantText()
{
//If we wrote anything then we'll always have a missing closing tag so close it here
if (result.Length > 0)
{
result.Append("</span>");
}
return result.ToString();
}
//Not needed
public void BeginTextBlock() { }
public void EndTextBlock() { }
public void RenderImage(ImageRenderInfo renderInfo) { }
}
}
}