My name's Lucas and I'm learning about WPF/C#.
I would like to join several images in a single file, as well as a game that use multiple textures that are all in one file, but I have no idea how to do. Could anyone help me at least to think how to do?
//Convert Image to Byte[]
public byte[] getByteFromImage()
{
byte[] imageArray = File.ReadAllBytes(op.FileName);
return imageArray;
}
//Convert Byte[] to Image
public void getImageFromByte()
{
FileStream f = new FileStream("escudos.bcf", FileMode.Open);
BinaryReader b = new BinaryReader(f);
Int64 c = f.Length+1;
MemoryStream ms = new MemoryStream(b.ReadBytes(int.Parse(c.ToString())));
Image image = new Image();
image.Source = BitmapFrame.Create(ms, BitmapCreateOptions.None,
BitmapCacheOption.OnLoad);
imgPatch2.Source = image.Source;
f.Dispose();
}
//Create Binary File
public void save(byte[] img)
{
FileStream f;
if (!File.Exists("escudos.bcf"))
{
f = new FileStream("escudos.bcf", FileMode.Create);
}
else
{
f = new FileStream("escudos.bcf", FileMode.Append);
}
BinaryWriter b = new BinaryWriter(f);
b.Write(img);
b.Close();
f.Dispose();
}
I thought in doing so, create a file and store it in binary images.
Until I got that part, but as this file will have multiple images in binary, I do not know how to pick just one binary image.
public void xmlCreate(string name, Int64 ini, Int64 fin)
{
if (!File.Exists("Escudos.xml"))
{
XmlTextWriter w = new XmlTextWriter("Escudos.xml", System.Text.Encoding.UTF8);
w.Formatting = Formatting.Indented;
w.WriteStartDocument();
w.WriteStartElement("Time");
w.WriteStartElement(name);
w.WriteElementString("Inicio", ini.ToString());
w.WriteElementString("Fim", fin.ToString());
w.WriteEndElement();
w.WriteEndDocument();
w.Close();
}
else
{
XDocument doc = XDocument.Load("Escudos.xml");
doc.Root.Add(new XElement(name));
doc.Root.Element(name).Add(new XElement("Inicio", ini.ToString()));
doc.Root.Element(name).Add(new XElement("Fim", fin.ToString()));
doc.Save("Escudos.xml");
}
}
Now I have created an xml file to store the start and end of the bytes. I can add only when I create a new xml file, I can not get a xml created and add new bytes. When I go to load the xml file gives an error message.
" An unhandled exception of type 'System.Xml.XmlException' occurred in System.Xml.dll
Additional information: '>' is an unexpected token. The expected token is '='. Line 3, position 15. "
UPDATE
when I'm reading the bytes to form an image, always the same way, even I adding different images. I'll add code below
//Add Image
private void btAddImage_Click(object sender, RoutedEventArgs e)
{
OpenFileDialog op = new OpenFileDialog();
op.Title = "Selecione a Imagem";
op.Filter = "All supported graphics|*.jpg;*.jpeg;*.png|" +
"JPEG (*.jpg;*.jpeg)|*.jpg;*.jpeg|" +
"Portable Network Graphic (*.png)|*.png";
if (op.ShowDialog() == true)
{
imgPatch.Source = new BitmapImage(new Uri(op.FileName));
txtName.Focus();
}
}
//Convert Image
private void btConvertImage_Click(object sender, RoutedEventArgs e)
{
if (String.IsNullOrEmpty(txtName.Text))
{
txtName.Focus();
MessageBox.Show("Preencha o Nome", "Error");
}
else
{
save(ConvertFileToByteArray(op.FileName), txtName.Text);
}
}
//Image to Byte Array
private static byte[] ConvertFileToByteArray(String FilePath)
{
return File.ReadAllBytes(FilePath);
}
//Save Binary File and XML File
public void save(byte[] img, string nome)
{
FileStream f;
long ini, fin = img.Length;
if (!File.Exists("Escudos.bcf"))
{
f = new FileStream("Escudos.bcf", FileMode.Create);
ini = 0;
}
else
{
f = new FileStream("Escudos.bcf", FileMode.Append);
ini = f.Length + 1;
bin = new TestBinarySegment();
}
bin.LoadAddSave("Escudos.xml", "Brasileiro", nome, ini, fin);
BinaryWriter b = new BinaryWriter(f);
b.Write(img);
b.Close();
f.Dispose();
}
//Load Image from Byte
private void btLoad_Click(object sender, RoutedEventArgs e)
{
getImageFromByte();
}
//Byte to Image
public void getImageFromByte(int start, int length)
{
using (FileStream fs = new FileStream("Escudos.bcf", FileMode.Open))
{
byte[] iba = new byte[fs.Length+1];
fs.Read(iba, start, length);
Image image = new Image();
image.Source = BitmapFrame.Create(fs, BitmapCreateOptions.None,
BitmapCacheOption.OnLoad);
imgPatch2.Source = image.Source;
}
}
Thanks
You have a binary file where its contents are segments and each segment contains the binary information needed to create an image. You need to store the starting index and length of each segment so that you can retrieve it. One way to do this is with an xml file.
To begin, create a container class for the segments. It looks like this...
public class BinarySegment
{
private const string FileName = "SegmentData.xml";
private static XmlSerializer serializer = new XmlSerializer(typeof(List<BinarySegment>));
public string SegmentName { get; set; }
public long SegmentStartIndex { get; set; }
public long SegmentLength { get; set; }
public static List<BinarySegment> LoadFromFile()
{
if (!File.Exists(FileName))
{
throw new Exception("File must be created first");
}
try
{
using (StreamReader sr = new StreamReader(FileName))
{
return serializer.Deserialize(sr) as List<BinarySegment>;
}
}
catch
{
throw new Exception("File as become corrupted");
}
}
public static void Save(List<BinarySegment> list)
{
try
{
using (StreamWriter sw = new StreamWriter(FileName))
{
serializer.Serialize(sw, list);
}
}
catch
{
throw;
}
}
}
There is one instance of this class for each image in your binary file. It will read/write a List of BinarySegments.
To test the class, create a test class like this...
public class TestBinarySegment
{
public TestBinarySegment()
{
List<BinarySegment> myBinarySegments = new List<BinarySegment>();
myBinarySegments.Add(new BinarySegment{SegmentName = "Segment1", SegmentStartIndex = 0, SegmentLength = 1111});
myBinarySegments.Add(new BinarySegment { SegmentName = "Segment2", SegmentStartIndex = 1111, SegmentLength = 1111 });
myBinarySegments.Add(new BinarySegment { SegmentName = "Segment3", SegmentStartIndex = 2222, SegmentLength = 1111 });
BinarySegment.Save(myBinarySegments);
}
public void LoadAddSave()
{
List<BinarySegment> myBinarySegments = BinarySegment.LoadFromFile();
myBinarySegments.Add(new BinarySegment { SegmentName = "Segment4", SegmentStartIndex = 333330, SegmentLength = 1111 });
BinarySegment.Save(myBinarySegments);
}
}
This class shows how to create the list and to save it. It also shows how to add new segments and resave the xml file.
When the test is run, you get a file like this...
<?xml version="1.0" encoding="utf-8"?>
<ArrayOfBinarySegment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<BinarySegment>
<SegmentName>Segment1</SegmentName>
<SegmentStartIndex>0</SegmentStartIndex>
<SegmentLength>1111</SegmentLength>
</BinarySegment>
<BinarySegment>
<SegmentName>Segment2</SegmentName>
<SegmentStartIndex>1111</SegmentStartIndex>
<SegmentLength>1111</SegmentLength>
</BinarySegment>
<BinarySegment>
<SegmentName>Segment3</SegmentName>
<SegmentStartIndex>2222</SegmentStartIndex>
<SegmentLength>1111</SegmentLength>
</BinarySegment>
<BinarySegment>
<SegmentName>Segment4</SegmentName>
<SegmentStartIndex>333330</SegmentStartIndex>
<SegmentLength>1111</SegmentLength>
</BinarySegment>
</ArrayOfBinarySegment>
To run the test, use code like this...
TestBinarySegment test = new TestBinarySegment();
test.LoadAddSave();
This shows how to use xml serialization to create a file and add new segments to it. You will need to test this concept and then integrate it into your project.
Related
I am using iTextSharp c# to extract images and its name from catalog pdf. I Am able to extract images from pdf, but struggling with extracting its corresponding image name as per the attached screenshot and save the file with that name. Please find the code below and let me know your suggestions.
Sample PDF: https://docdro.id/PwBsNR9
Code:
private static List<System.Drawing.Image> ExtractImages(String PDFSourcePath)
{
List<System.Drawing.Image> ImgList = new List<System.Drawing.Image>();
iTextSharp.text.pdf.RandomAccessFileOrArray RAFObj = null;
iTextSharp.text.pdf.PdfReader PDFReaderObj = null;
iTextSharp.text.pdf.PdfObject PDFObj = null;
iTextSharp.text.pdf.PdfStream PDFStremObj = null;
try
{
RAFObj = new iTextSharp.text.pdf.RandomAccessFileOrArray(PDFSourcePath);
PDFReaderObj = new iTextSharp.text.pdf.PdfReader(RAFObj, null);
for (int i = 0; i <= PDFReaderObj.XrefSize - 1; i++)
{
PDFObj = PDFReaderObj.GetPdfObject(i);
if ((PDFObj != null) && PDFObj.IsStream())
{
PDFStremObj = (iTextSharp.text.pdf.PdfStream)PDFObj;
iTextSharp.text.pdf.PdfObject subtype = PDFStremObj.Get(iTextSharp.text.pdf.PdfName.SUBTYPE);
if ((subtype != null) && subtype.ToString() == iTextSharp.text.pdf.PdfName.IMAGE.ToString())
{
}
if ((subtype != null) && subtype.ToString() == iTextSharp.text.pdf.PdfName.IMAGE.ToString())
{
try
{
iTextSharp.text.pdf.parser.PdfImageObject PdfImageObj =
new iTextSharp.text.pdf.parser.PdfImageObject((iTextSharp.text.pdf.PRStream)PDFStremObj);
System.Drawing.Image ImgPDF = PdfImageObj.GetDrawingImage();
ImgList.Add(ImgPDF);
}
catch (Exception)
{
}
}
}
}
PDFReaderObj.Close();
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
return ImgList;
}
Unfortunately the example PDF is not tagged. Thus, one has to otherwise try and associate title text and image, either by analyzing the location in respect to each other or by exploiting a pattern in the content streams.
In the case at hand analyzing the location in respect to each other is feasible as the title always is (at least partially) drawn on the matching image or is the text right beneath it. Thus, one could in a first pass extract the text with position from a page and in a second one the images, at the same time looking for a title in the previously extracted text in the image area or right beneath. Alternatively one could first extract images with position and size and then extract the text in these areas.
But there also is a certain pattern in the content streams: The titel is always drawn in a single text drawing instruction right after the corresponding image is drawn. Thus, one can also go ahead and in one pass extract images and the next drawn text as associated title.
Either approach can be implemented using the iText parser API. For example in case of the latter approach as follows: first, one implements a render listener that behaves as described, i.e. saves images and the following text:
internal class ImageWithTitleRenderListener : IRenderListener
{
int imageNumber = 0;
String format;
bool expectingTitle = false;
public ImageWithTitleRenderListener(String format)
{
this.format = format;
}
public void BeginTextBlock()
{ }
public void EndTextBlock()
{ }
public void RenderText(TextRenderInfo renderInfo)
{
if (expectingTitle)
{
expectingTitle = false;
File.WriteAllText(string.Format(format, imageNumber, "txt"), renderInfo.GetText());
}
}
public void RenderImage(ImageRenderInfo renderInfo)
{
imageNumber++;
expectingTitle = true;
PdfImageObject imageObject = renderInfo.GetImage();
if (imageObject == null)
{
Console.WriteLine("Image {0} could not be read.", imageNumber);
}
else
{
File.WriteAllBytes(string.Format(format, imageNumber, imageObject.GetFileType()), imageObject.GetImageAsBytes());
}
}
}
Then one parses the document pages using that render listener:
using (PdfReader reader = new PdfReader(#"EVERMOTION ARCHMODELS VOL.78.pdf"))
{
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
ImageWithTitleRenderListener listener = new ImageWithTitleRenderListener(#"EVERMOTION ARCHMODELS VOL.78-{0:D3}.{1}");
for (var i = 1; i <= reader.NumberOfPages; i++)
{
parser.ProcessContent(i, listener);
}
}
I hope this would help.
I am doing this type of thing but if this would help.
// existing pdf path
PdfReader reader = new PdfReader(path);
PRStream pst;
PdfImageObject pio;
PdfObject po;
// number of objects in pdf document
int n = reader.XrefSize;
//FileStream fs = null;
// set image file location
//String path = "E:/";
for (int i = 0; i < n; i++)
{
// get the object at the index i in the objects collection
po = reader.GetPdfObject(i);
// object not found so continue
if (po == null || !po.IsStream())
continue;
//cast object to stream
pst = (PRStream)po;
//get the object type
PdfObject type = pst.Get(PdfName.SUBTYPE);
//check if the object is the image type object
if (type != null && type.ToString().Equals(PdfName.IMAGE.ToString()))
{
//get the image
pio = new PdfImageObject(pst);
// fs = new FileStream(path + "image" + i + ".jpg", FileMode.Create);
//read bytes of image in to an array
byte[] imgdata = pio.GetImageAsBytes();
try
{
Stream stream = new MemoryStream(imgdata);
FileStream fs = stream as FileStream;
if (fs != null) Console.WriteLine(fs.Name);
}
catch
{
}
}
}
Now you can save your stream.
public void SaveStreamToFile(string fileFullPath, Stream stream)
{
if (stream.Length == 0) return;
// Create a FileStream object to write a stream to a file
using (FileStream fileStream = System.IO.File.Create(fileFullPath, (int)stream.Length))
{
// Fill the bytes[] array with the stream data
byte[] bytesInStream = new byte[stream.Length];
stream.Read(bytesInStream, 0, (int)bytesInStream.Length);
// Use FileStream object to write to the specified file
fileStream.Write(bytesInStream, 0, bytesInStream.Length);
}
}
I have a TCP application where I can request images from a folder on the server from the client. If I request a small folder, it works fine. If its a big folder it will throw an out of memory exception. But then anything after that, even a folder with 1 file will throw the same out of memory exception.
I thought it might have been the thread that is out of memory, so I tried to put it on a separate thread and task but neither worked. Here is the code I'm using:
public static void Images(string path)
{
new Task(() =>
{
try
{
string root = lookupDirectoryPath("Application data");
string backupPath = root + #"\Apple Computer\MobileSync\";
string imagePath = backupPath + path;
if (Directory.Exists(imagePath))
{
String[] allfiles = Directory.GetFiles(imagePath, "*.*", SearchOption.AllDirectories);
List<Image> allImages = new List<Image>();
foreach (string file in allfiles)
{
using (FileStream stream = new FileStream(file, FileMode.Open, FileAccess.Read))
{
if (IsImage(stream))
{
allImages.Add(Image.FromFile(file));
}
}
}
if (allImages.Count > 0)
{
byte[] data = imageListToByteArray(allImages);
serverSendByteArray(data, 12);
}
else
{
serverSendByteArray(Encoding.Default.GetBytes("backup contained no images"), 1);
}
}
else
{
serverSendByteArray(Encoding.Default.GetBytes("iphone backup folder does not exist"), 1);
}
}
catch (Exception ex)
{
if (ex.GetType().IsAssignableFrom(typeof(OutOfMemoryException)))
{
serverSendByteArray(Encoding.Default.GetBytes("Out of memory, could not send iphone images"), 1);
}
else
{
serverSendByteArray(Encoding.Default.GetBytes("Unknown error, could not send iphone images"), 1);
}
}
}).Start();
}
The exception gets thrown at allImages.Add(Image.FromFile(file));
this is the isImage() function:
public static bool IsImage(Stream stream)
{
stream.Seek(0, SeekOrigin.Begin);
List<string> jpg = new List<string> { "FF", "D8" };
List<string> bmp = new List<string> { "42", "4D" };
List<string> gif = new List<string> { "47", "49", "46" };
List<string> png = new List<string> { "89", "50", "4E", "47", "0D", "0A", "1A", "0A" };
List<List<string>> imgTypes = new List<List<string>> { jpg, bmp, gif, png };
List<string> bytesIterated = new List<string>();
for (int i = 0; i < 8; i++)
{
string bit = stream.ReadByte().ToString("X2");
bytesIterated.Add(bit);
bool isImage = imgTypes.Any(img => !img.Except(bytesIterated).Any());
if (isImage)
{
return true;
}
}
return false;
}
Thanks for any help
I tried and I can reproduce your problem. It is definitely out of memory and nothing like "It just seems to be" the memory usage increases to about 4 GB and then the error shows up. Console output is just to see what's happening there.
The Image object seems to be not the best way to save the data.
I tried this and got this to work with many many files. Maybe you can change the code to fit your needs:
String[] allfiles = Directory.GetFiles(imagePath, "*.*", SearchOption.AllDirectories);
//List<Image> allImages = new List<Image>();
List<Byte[]> allImagesBytes = new List<Byte[]>();
foreach (string file in allfiles)
{
using (FileStream stream = new FileStream(file, FileMode.Open, FileAccess.Read))
{
if (IsImage(stream))
{
Console.Clear();
Console.Write(allImagesBytes.Count());
//allImages.Add(Image.FromStream(stream));
//allImages.Add(Image.FromFile(file));
allImagesBytes.Add(File.ReadAllBytes(file));
}
}
}
Image.FromFile seem to cause the error. In the following question it was a corrupeted image file or running out of file handles, Image.FromStream() did it better. Worth a try cause you already have the stream open:
https://stackoverflow.com/a/2216338/7803013
Try changing this
using (FileStream stream = new FileStream(file, FileMode.Open, FileAccess.Read))
{
if (IsImage(stream))
{
allImages.Add(Image.FromFile(file));
}
}
...
if (allImages.Count > 0)
{
byte[] data = imageListToByteArray(allImages);
serverSendByteArray(data, 12);
}
into this:
using (FileStream stream = new FileStream(file, FileMode.Open, FileAccess.Read))
{
if (IsImage(stream))
{
allImages.Add(Image.FromFile(file));
}
stream.Close();
}
....
if (allImages.Count > 0)
{
byte[] data = imageListToByteArray(allImages);
foreach(Image img in allImages)
{
img.Dispose();
}
serverSendByteArray(data, 12);
}
I am working on a program which converts the inkcanvas strokes to a byte array for encryption and then saves it in a txt file. Essentially I need to convert a byte array to inkcanvas strokes. I have the first half of the code done (which converts the inkcanvas strokes to a byte array):
private byte[] InkCanvasToByte()
{
using (MemoryStream ms = new MemoryStream())
{
if(myInkCanvas.Strokes.Count > 0)
{
myInkCanvas.Strokes.Save(ms, true);
byte[] unencryptedSignature = ms.ToArray();
return unencryptedSignature;
}
else
{
return null;
}
}
}
But I need help writing a method to convert the byte array into inkcanvas strokes in order to convert the inkcanvas strokes to a jpg.
So far I have created a method which opens the byte array file and writes it to a byte array variable:
private void ReadByteArrayFromFile()
{
string Chosen_File = "";
Microsoft.Win32.OpenFileDialog ofd = new Microsoft.Win32.OpenFileDialog();
ofd.Filter = "All Files (*.*)|*.*";
ofd.FilterIndex = 1;
ofd.Multiselect = false;
bool? userClickedOK = ofd.ShowDialog();
if (userClickedOK == true)
{
Chosen_File = ofd.FileName;
}
byte[] bytesFromFile = File.ReadAllBytes(Chosen_File);
}
Now all I need to do is convert that byte array back into an image, either through inkcanvas strokes. I'll update this post with a solution if I find one!
EDIT: Hmm. I'm using the code from that link and I get: "The input stream is not a valid binary format. The Starting contents (in byes) are: 00-FB-03-03-06-48-11-45-35-46-35-11-00-00-80-3F-1F ..."
The code I'm using is:
private void ReadByteArrayFromFile(string Chosen_File)
{
byte[] bytesFromFile = File.ReadAllBytes(Chosen_File);
try
{
BinaryFormatter bf = new BinaryFormatter();
MemoryStream ms = new MemoryStream(bytesFromFile);
MyCustomStrokes customStrokes = bf.Deserialize(ms) as MyCustomStrokes;
for(int i = 0; i < customStrokes.StrokeCollection.Length; i++)
{
if(customStrokes.StrokeCollection[i] != null)
{
StylusPointCollection stylusCollection = new
StylusPointCollection(customStrokes.StrokeCollection[i]);
Stroke stroke = new Stroke(stylusCollection);
StrokeCollection strokes = new StrokeCollection();
strokes.Add(stroke);
this.MyInkPresenter.Strokes.Add(strokes);
}
}
}
catch (Exception ex)
{
System.Windows.MessageBox.Show(ex.Message);
}
}
private void DecryptByteArray(byte[] encryptedArray)
{
}
}
[Serializable]
public sealed class MyCustomStrokes
{
public MyCustomStrokes() { }
/// <SUMMARY>
/// The first index is for the stroke no.
/// The second index is for the keep the 2D point of the Stroke.
/// </SUMMARY>
public Point[][] StrokeCollection;
}
}
My problem was that I didn't serialize the output to the saved file and thus the when I loaded that file deserializing it tripped an error. Here is the correct code:
private void SaveByteArrayToFile(byte[] byteArray)
{
var dialog = new System.Windows.Forms.FolderBrowserDialog();
string filepath = "";
if (dialog.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
filepath += dialog.SelectedPath;
System.Windows.MessageBox.Show(filepath);
}
filepath += "Signature.txt";
MyCustomStrokes customStrokes = new MyCustomStrokes();
customStrokes.StrokeCollection = new Point[myInkCanvas.Strokes.Count][];
for (int i = 0; i < myInkCanvas.Strokes.Count; i++)
{
customStrokes.StrokeCollection[i] =
new Point[this.myInkCanvas.Strokes[i].StylusPoints.Count];
for (int j = 0; j < myInkCanvas.Strokes[i].StylusPoints.Count; j++)
{
customStrokes.StrokeCollection[i][j] = new Point();
customStrokes.StrokeCollection[i][j].X =
myInkCanvas.Strokes[i].StylusPoints[j].X;
customStrokes.StrokeCollection[i][j].Y =
myInkCanvas.Strokes[i].StylusPoints[j].Y;
}
}
MemoryStream ms = new MemoryStream();
BinaryFormatter bf = new BinaryFormatter();
bf.Serialize(ms, customStrokes);
File.WriteAllBytes(filepath, Encrypt(ms.GetBuffer()));
}
private void ReadByteArrayFromFile(string Chosen_File)
{
byte[] bytesFromFile = File.ReadAllBytes(Chosen_File);
byte[] decryptedBytes = Decrypt(bytesFromFile);
try
{
BinaryFormatter bf = new BinaryFormatter();
MemoryStream ms = new MemoryStream(decryptedBytes);
MyCustomStrokes customStrokes = bf.Deserialize(ms) as MyCustomStrokes;
for(int i = 0; i < customStrokes.StrokeCollection.Length; i++)
{
if(customStrokes.StrokeCollection[i] != null)
{
StylusPointCollection stylusCollection = new
StylusPointCollection(customStrokes.StrokeCollection[i]);
Stroke stroke = new Stroke(stylusCollection);
StrokeCollection strokes = new StrokeCollection();
strokes.Add(stroke);
this.MyInkPresenter.Strokes.Add(strokes);
}
}
}
catch (Exception ex)
{
System.Windows.MessageBox.Show(ex.Message);
}
}
[Serializable]
public sealed class MyCustomStrokes
{
public MyCustomStrokes() { }
/// <SUMMARY>
/// The first index is for the stroke no.
/// The second index is for the keep the 2D point of the Stroke.
/// </SUMMARY>
public Point[][] StrokeCollection;
}
I am trying to extract all the images from a pdf using itextsharp but can't seem to overcome this one hurdle.
The error occures on the line System.Drawing.Image ImgPDF = System.Drawing.Image.FromStream(MS); giving an error of "Parameter is not valid".
I think it works when the image is a bitmap but not of any other format.
I have this following code - sorry for the length;
private void Form1_Load(object sender, EventArgs e)
{
FileStream fs = File.OpenRead(#"reader.pdf");
byte[] data = new byte[fs.Length];
fs.Read(data, 0, (int)fs.Length);
List<System.Drawing.Image> ImgList = new List<System.Drawing.Image>();
iTextSharp.text.pdf.RandomAccessFileOrArray RAFObj = null;
iTextSharp.text.pdf.PdfReader PDFReaderObj = null;
iTextSharp.text.pdf.PdfObject PDFObj = null;
iTextSharp.text.pdf.PdfStream PDFStremObj = null;
try
{
RAFObj = new iTextSharp.text.pdf.RandomAccessFileOrArray(data);
PDFReaderObj = new iTextSharp.text.pdf.PdfReader(RAFObj, null);
for (int i = 0; i <= PDFReaderObj.XrefSize - 1; i++)
{
PDFObj = PDFReaderObj.GetPdfObject(i);
if ((PDFObj != null) && PDFObj.IsStream())
{
PDFStremObj = (iTextSharp.text.pdf.PdfStream)PDFObj;
iTextSharp.text.pdf.PdfObject subtype = PDFStremObj.Get(iTextSharp.text.pdf.PdfName.SUBTYPE);
if ((subtype != null) && subtype.ToString() == iTextSharp.text.pdf.PdfName.IMAGE.ToString())
{
byte[] bytes = iTextSharp.text.pdf.PdfReader.GetStreamBytesRaw((iTextSharp.text.pdf.PRStream)PDFStremObj);
if ((bytes != null))
{
try
{
System.IO.MemoryStream MS = new System.IO.MemoryStream(bytes);
MS.Position = 0;
System.Drawing.Image ImgPDF = System.Drawing.Image.FromStream(MS);
ImgList.Add(ImgPDF);
}
catch (Exception)
{
}
}
}
}
}
PDFReaderObj.Close();
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
} //Form1_Load
Resolved...
Even I got the same exception of "Parameter is not valid" and after so much of
work with the help of the link provided by der_chirurg
(http://kuujinbo.info/iTextSharp/CCITTFaxDecodeExtract.aspx ) I resolved it
and following is the code:
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using iTextSharp.text.pdf.parser;
using Dotnet = System.Drawing.Image;
using iTextSharp.text.pdf;
namespace PDF_Parsing
{
partial class PDF_ImgExtraction
{
string imgPath;
private void ExtractImage(string pdfFile)
{
PdfReader pdfReader = new PdfReader(files[fileIndex]);
for (int pageNumber = 1; pageNumber <= pdfReader.NumberOfPages; pageNumber++)
{
PdfReader pdf = new PdfReader(pdfFile);
PdfDictionary pg = pdf.GetPageN(pageNumber);
PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
string width = tg.Get(PdfName.WIDTH).ToString();
string height = tg.Get(PdfName.HEIGHT).ToString();
ImageRenderInfo imgRI = ImageRenderInfo.CreateForXObject(new Matrix(float.Parse(width), float.Parse(height)), (PRIndirectReference)obj, tg);
RenderImage(imgRI);
}
}
}
}
private void RenderImage(ImageRenderInfo renderInfo)
{
PdfImageObject image = renderInfo.GetImage();
using (Dotnet dotnetImg = image.GetDrawingImage())
{
if (dotnetImg != null)
{
using (MemoryStream ms = new MemoryStream())
{
dotnetImg.Save(ms, ImageFormat.Tiff);
Bitmap d = new Bitmap(dotnetImg);
d.Save(imgPath);
}
}
}
}
}
}
You need to check the stream's /Filter to see what image format a given image uses. It may be a standard image format:
DCTDecode (jpeg)
JPXDecode (jpeg 2000)
JBIG2Decode (jbig is a B&W only format)
CCITTFaxDecode (fax format, PDF supports group 3 and 4)
Other than that, you'll need to get the raw bytes (as you are), and build an image using the image stream's width, height, bits per component, number of color components (could be CMYK, indexed, RGB, or Something Weird), and a few others, as defined in section 8.9 of the ISO PDF SPECIFICATION (available for free).
So in some cases your code will work, but in others, it'll fail with the exception you mentioned.
PS: When you have an exception, PLEASE include the stack trace every single time. Pretty please with sugar on top?
Works for me like this, using these two methods:
public static List<System.Drawing.Image> ExtractImagesFromPDF(byte[] bytes)
{
var imgs = new List<System.Drawing.Image>();
var pdf = new PdfReader(bytes);
try
{
for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
{
PdfDictionary pg = pdf.GetPageN(pageNumber);
List<PdfObject> objs = FindImageInPDFDictionary(pg);
foreach (var obj in objs)
{
if (obj != null)
{
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PdfStream pdfStrem = (PdfStream)pdfObj;
var pdfImage = new PdfImageObject((PRStream)pdfStrem);
var img = pdfImage.GetDrawingImage();
imgs.Add(img);
}
}
}
}
finally
{
pdf.Close();
}
return imgs;
}
private static List<PdfObject> FindImageInPDFDictionary(PdfDictionary pg)
{
var res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
var xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
var pdfObgs = new List<PdfObject>();
if (xobj != null)
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
var tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
var type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
if (PdfName.IMAGE.Equals(type)) // image at the root of the pdf
{
pdfObgs.Add(obj);
}
else if (PdfName.FORM.Equals(type)) // image inside a form
{
FindImageInPDFDictionary(tg).ForEach(o => pdfObgs.Add(o));
}
else if (PdfName.GROUP.Equals(type)) // image inside a group
{
FindImageInPDFDictionary(tg).ForEach(o => pdfObgs.Add(o));
}
}
}
}
return pdfObgs;
}
In newer version of iTextSharp, the 1st parameter of ImageRenderInfo.CreateForXObject is not Matrix anymore but GraphicsState. #der_chirurg's approach should work. I tested myself with the information from the following link and it worked beautifully:
http://www.thevalvepage.com/swmonkey/2014/11/26/extract-images-from-pdf-files-using-itextsharp/
To extract all Images on all Pages, it is not necessary to implement different filters. iTextSharp has an Image Renderer, which saves all Images in their original image type.
Just do the following found here: http://kuujinbo.info/iTextSharp/CCITTFaxDecodeExtract.aspx You don't need to implement HttpHandler...
I added library on github which, extract images in PDF and compress them.
Could be useful, when you are going to start play with very powerful library ITextSharp.
Here the link: https://github.com/rock-walker/PdfCompression
This works for me and I think it's a simple solution:
Write a custom RenderListener and implement its RenderImage method, something like this
public void RenderImage(ImageRenderInfo info)
{
PdfImageObject image = info.GetImage();
Parser.Matrix matrix = info.GetImageCTM();
var fileType = image.GetFileType();
ImageFormat format;
switch (fileType)
{//you may add more types here
case "jpg":
case "jpeg":
format = ImageFormat.Jpeg;
break;
case "pnt":
format = ImageFormat.Png;
break;
case "bmp":
format = ImageFormat.Bmp;
break;
case "tiff":
format = ImageFormat.Tiff;
break;
case "gif":
format = ImageFormat.Gif;
break;
default:
format = ImageFormat.Jpeg;
break;
}
var pic = image.GetDrawingImage();
var x = matrix[Parser.Matrix.I31];
var y = matrix[Parser.Matrix.I32];
var width = matrix[Parser.Matrix.I11];
var height = matrix[Parser.Matrix.I22];
if (x < <some value> && y < <some value>)
{
return;//ignore these images
}
pic.Save(<path and name>, format);
}
I have used this library in the past without any problems.
http://www.winnovative-software.com/PdfImgExtractor.aspx
private void btnExtractImages_Click(object sender, EventArgs e)
{
if (pdfFileTextBox.Text.Trim().Equals(String.Empty))
{
MessageBox.Show("Please choose a source PDF file", "Choose PDF file", MessageBoxButtons.OK);
return;
}
// the source pdf file
string pdfFileName = pdfFileTextBox.Text.Trim();
// start page number
int startPageNumber = int.Parse(textBoxStartPage.Text.Trim());
// end page number
// when it is 0 the extraction will continue up to the end of document
int endPageNumber = 0;
if (textBoxEndPage.Text.Trim() != String.Empty)
endPageNumber = int.Parse(textBoxEndPage.Text.Trim());
// create the PDF images extractor object
PdfImagesExtractor pdfImagesExtractor = new PdfImagesExtractor();
pdfImagesExtractor.LicenseKey = "31FAUEJHUEBQRl5AUENBXkFCXklJSUlQQA==";
// the demo output directory
string outputDirectory = Path.Combine(Application.StartupPath, #"DemoFiles\Output");
Cursor = Cursors.WaitCursor;
// set the handler to be called when an image was extracted
pdfImagesExtractor.ImageExtractedEvent += pdfImagesExtractor_ImageExtractedEvent;
try
{
// start images counting
imageIndex = 0;
// call the images extractor to raise the ImageExtractedEvent event when an images is extracted from a PDF page
// the pdfImagesExtractor_ImageExtractedEvent handler below will be executed for each extracted image
pdfImagesExtractor.ExtractImagesInEvent(pdfFileName, startPageNumber, endPageNumber);
// Alternatively you can use the ExtractImages() and ExtractImagesToFile() methods
// to extracted the images from a PDF document in memory or to image files in a directory
// uncomment the line below to extract the images to an array of ExtractedImage objects
//ExtractedImage[] pdfPageImages = pdfImagesExtractor.ExtractImages(pdfFileName, startPageNumber, endPageNumber);
// uncomment the lines below to extract the images to image files in a directory
//string outputDirectory = System.IO.Path.Combine(Application.StartupPath, #"DemoFiles\Output");
//pdfImagesExtractor.ExtractImagesToFile(pdfFileName, startPageNumber, endPageNumber, outputDirectory, "pdfimage");
}
catch (Exception ex)
{
// The extraction failed
MessageBox.Show(String.Format("An error occurred. {0}", ex.Message), "Error");
return;
}
finally
{
// uninstall the event handler
pdfImagesExtractor.ImageExtractedEvent -= pdfImagesExtractor_ImageExtractedEvent;
Cursor = Cursors.Arrow;
}
try
{
System.Diagnostics.Process.Start(outputDirectory);
}
catch (Exception ex)
{
MessageBox.Show(string.Format("Cannot open output folder. {0}", ex.Message));
return;
}
}
/// <summary>
/// The ImageExtractedEvent event handler called after an image was extracted from a PDF page.
/// The event is raised when the ExtractImagesInEvent() method is used
/// </summary>
/// <param name="args">The handler argument containing the extracted image and the PDF page number</param>
void pdfImagesExtractor_ImageExtractedEvent(ImageExtractedEventArgs args)
{
// get the image object and page number from even handler argument
Image pdfPageImageObj = args.ExtractedImage.ImageObject;
int pageNumber = args.ExtractedImage.PageNumber;
// save the extracted image to a PNG file
string outputPageImage = Path.Combine(Application.StartupPath, #"DemoFiles\Output",
"pdfimage_" + pageNumber.ToString() + "_" + imageIndex++ + ".png");
pdfPageImageObj.Save(outputPageImage, ImageFormat.Png);
args.ExtractedImage.Dispose();
}
i can upload images to the database using linq and the listview control when referancing the e.Values method for the ListViewInsertEventArgs, but there is no such method in the ListViewEditEventArgs, so what can i use to achieve the same results?
here is my inserting code:
protected void ProjectPhotosList_ItemInserting(object sender, ListViewInsertEventArgs e)
{
FileUpload uplImage = (FileUpload)ProjectPhotosList.InsertItem.FindControl("uplImage");
Label fileuploadlbl = (Label)ProjectPhotosList.InsertItem.FindControl("fileuploadlbl");
byte[] img = null;
if (uplImage.HasFile || !uplImage.FileName.ToLower().EndsWith(".jpg"))
{
try
{
img = new byte[uplImage.PostedFile.ContentLength];
uplImage.PostedFile.InputStream.Read(img, 0, img.Length);
}
catch
{
fileuploadlbl.Text = "unable to upload " + uplImage.FileName.ToString();
}
}
if (img == null)
{
e.Cancel = true;
fileuploadlbl.Text = "Please choose a file to upload";
}
try
{
e.Values.Add("ProjectPhoto", new System.Data.Linq.Binary(img));
fileuploadlbl.Text = "File Upload Successful";
}
catch
{
fileuploadlbl.Text = "File Upload Failed, please try again";
}
}
ok so i have solved the issue! I just had to go about it a bit of a different way:
this is the important code:
int mykey = int.Parse(ProjectPhotosList.DataKeys[e.ItemIndex].Value.ToString());
its just a simple way to get the primarykey value of the selected row.
I found a post about uploading pdf's to a database and decided to base the rest of my code on that. So here the full code:
protected void ProjectPhotosList_ItemUpdating(object sender, ListViewUpdateEventArgs e)
{
FileUpload myFile = (FileUpload)ProjectPhotosList.EditItem.FindControl("uploadImage");
TextBox myCaption = (TextBox)ProjectPhotosList.EditItem.FindControl("ProjectPhotoCaptionTextBox");
int mykey = int.Parse(ProjectPhotosList.DataKeys[e.ItemIndex].Value.ToString());
if (myFile.HasFile)
{
//Get the posted file
Stream fileDataStream = myFile.PostedFile.InputStream;
//Get length of file
int fileLength = myFile.PostedFile.ContentLength;
//Create a byte array with file length
byte[] fileData = new byte[fileLength];
//Read the stream into the byte array
fileDataStream.Read(fileData, 0, fileLength);
//get the file type
string fileType = myFile.PostedFile.ContentType;
//Open Connection
PHJamesDataContext db = new PHJamesDataContext();
//Find the Right Row
PHJProjectPhoto Newphoto = (from p in db.PHJProjectPhotos
where p.ProjectPhotoId == mykey
select p).Single<PHJProjectPhoto>();
Newphoto.ProjectPhoto = fileData;
db.SubmitChanges();
}