IntroductionLast night I was looking one the forums and I saw one user asked question, how to extract image from the word document and save to the database.We can do this using Microsoft Office.InterOp.Word libaray. It supports to manage code to access the Word COM object within the .NET Framework.Therefore you should have MS office run time in your machine to implement this feature. Technologies.NET Framework 2.0 or later MicroSoft Office 2003 or later LanguagesC# PrerequestiesVisual Studio 2005 or later version MirocroSoft Office run time for .NET ImplementationThe Implementation has three parts to archive this goal. 1. Read picture from the word document 2. Save to Memory using Memory stream 3. Finally save to data base. Note: Code implemented this article assumes multiple images in the word document. Let us create a new project call as "ExtractImageFromWRD" and then add MS Office Reference for access word document in to the project. To this right click on your project, then select the add References menu and go the .NET Tab now select "MicroSoft.Office.InterOp.Word (10/11/12) object libaray. Then we add this using statement on top of the class. using Microsoft.Office.Interop.Word; Now we start to write code for the first operation implementation. This method should load word doc and then read inline shapes. ApplicationClass wordApplicationClass = null;object missing = System.Reflection.Missing.Value;object yes = true;object filePathObject = null;public List GetImageFromFile(string filePath){if (wordApplicationClass == null){wordApplicationClass = new ApplicationClass();}MemoryStream stream = null;List collectionImageSream = new List();try{filePathObject = filePath;Document document = wordApplicationClass.Documents.Open(ref filePathObject, ref missing,ref yes, ref missing, ref missing, ref missing,ref missing, ref missing, ref missing, ref missing,ref missing, ref missing, ref missing, ref missing,ref missing, ref missing);wordApplicationClass.Visible = false;for (int i = 1; i <= document.InlineShapes.Count; i++){document.InlineShapes[i].Select();document.ActiveWindow.Selection.CopyAsPicture();System.Windows.Forms.IDataObject data = Clipboard.GetDataObject();if (data.GetDataPresent(typeof(System.Drawing.Bitmap))){
Bitmap bmp;stream = new MemoryStream();bmp = (Bitmap)data.GetData(typeof(System.Drawing.Bitmap));bmp.Save(stream, ImageFormat.Bmp);collectionImageSream.Add(stream);bmp.Dispose();}}}catch (Exception ex){throw ex;}finally{// finally clear and close the word object.wordApplicationClass.Quit(ref missing, ref missing, ref missing);}return collectionImageSream;}Look at above code is little different from the normal code. This means look at this portion // loop through the inlineshapesfor (int i = 1; i <= document.InlineShapes.Count; i++){// select every inlineshapes(it takes images)document.InlineShapes[i].Select();// then we need copy selected object as a Picturedocument.ActiveWindow.Selection.CopyAsPicture();// call the clipboard object to keep in momery.System.Windows.Forms.IDataObject data = Clipboard.GetDataObject();// access data from the clipboard which is BitMap objectif (data.GetDataPresent(typeof(System.Drawing.Bitmap))){
Bitmap bmp;// create instance to memory stream to keep image content in momerystream = new MemoryStream();bmp = (Bitmap)data.GetData(typeof(System.Drawing.Bitmap));// finally save to the momerybmp.Save(stream, ImageFormat.Bmp);//this is for more than one images in doc we adding into the List of MomeryStream collectioncollectionImageSream.Add(stream);// dispose the birmap object.bmp.Dispose();}}Note: In the above method I have given line by line description what I have done.Next we need to write code for read image content from the stream object and save to database. public void SaveToDatabase(List collectionOfImages)
{
try{
if (collectionOfImages == null)
{
throw new NullReferenceException("the collection of the image stream is null");
}
if (collectionOfImages.Count == 0)
{
throw new Exception("The collection of image stream is empty");
}
int index = 0;
foreach (MemoryStream stream in collectionOfImages)
{
long noOfBytes = stream.Length;
using (BinaryReader reader = new BinaryReader(stream))
{
byte[] imageData = null;
imageData = reader.ReadBytes((int)noOfBytes);
AddImageToDB(imageData, index);
index++;
}
}
}
catch (Exception ex)
{
throw ex;
}
finally{
collectionOfImages = null;
}
} In the above method just read the image content from the steam to byte array and pass to the method to Add to database. In this method we need to know two things. 1. We need get file length from the memory stream object. 2. We need use the BinaryReader to read content from the memory stream. // use the binaryReader to read byte data from the object.using (BinaryReader reader = new BinaryReader(stream))
{
byte[] imageData = null;
// read data and assign to bytes array.imageData = reader.ReadBytes((int)noOfBytes);
}Next and Final step is to write method to add to image content to database as binary data. private void AddImageToDB(byte[] imageData, int id)
{
try{
using (SqlConnection connection = new SqlConnection(connectionString))
{
if (connection.State != System.Data.ConnectionState.Open)
{
connection.Open();
}
using (SqlCommand cmd = connection.CreateCommand())
{
cmd.CommandText = "spImagesAdd";
cmd.CommandType = System.Data.CommandType.StoredProcedure;
cmd.Parameters.AddWithValue("@ID", id).SqlDbType = System.Data.SqlDbType.Int;
cmd.Parameters.AddWithValue("@image", imageData).SqlDbType = System.Data.SqlDbType.Image;
cmd.ExecuteNonQuery();
}
}
}
catch (Exception ex)
{
throw ex;
}
}Above method has usual code line to save the database using Stored Procedure.Now we need to create a table in our database to save image as Image data type. Therefore we need too, create a table using following sql script. SQL Script.CREATE TABLE [dbo].[Images]([ID] [int] NULL,[ImageData] [image] NULL) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY] Stored ProceduresCREATE PROCEDURE spAddImages
-- Add the parameters for the stored procedure here@ImageData image,@ID intASBEGINSET NOCOUNT ON;INSERT INTO [HouseRental].[dbo].[Images]([ID],[ImageData])VALUES(@ID,@ImageData)ENDGO Complete Source codeusing System;using System.Collections.Generic;using Microsoft.Office.Interop.Word;using System.Drawing;using System.Windows.Forms;using System.IO;using System.Drawing.Imaging;using System.Data.SqlClient;namespace CG.CS.Utils
{public class WordUtility
{ApplicationClass wordApplicationClass = null;object missing = System.Reflection.Missing.Value;object yes = true;object filePathObject = null;public List GetImageFromFile(string filePath){if (wordApplicationClass == null){wordApplicationClass = new ApplicationClass();}MemoryStream stream = null;List collectionImageSream = new List();try{filePathObject = filePath;Document document = wordApplicationClass.Documents.Open(ref filePathObject, ref missing,ref yes, ref missing, ref missing, ref missing,ref missing, ref missing, ref missing, ref missing,ref missing, ref missing, ref missing, ref missing,ref missing, ref missing);wordApplicationClass.Visible = false;for (int i = 1; i <= document.InlineShapes.Count; i++){document.InlineShapes[i].Select();document.ActiveWindow.Selection.CopyAsPicture();System.Windows.Forms.IDataObject data = Clipboard.GetDataObject();if (data.GetDataPresent(typeof(System.Drawing.Bitmap))){Bitmap bmp;stream = new MemoryStream();bmp = (Bitmap)data.GetData(typeof(System.Drawing.Bitmap));bmp.Save(stream, ImageFormat.Bmp);collectionImageSream.Add(stream);bmp.Dispose();}}}catch (Exception ex){throw ex;}finally{wordApplicationClass.Quit(ref missing, ref missing, ref missing);}return collectionImageSream;}public void SaveToDatabase(List collectionOfImages){try{if (collectionOfImages == null){throw new NullReferenceException("the collection of the image stream is null");}if (collectionOfImages.Count == 0){throw new Exception("The collection of image stream is empty");}int index = 0;foreach (MemoryStream stream in collectionOfImages){long noOfBytes = stream.Length;using (BinaryReader reader = new BinaryReader(stream)){byte[] imageData = null;imageData = reader.ReadBytes((int)noOfBytes);AddImageToDB(imageData, index);index++;}}}catch (Exception ex){throw ex;}finally{collectionOfImages = null;}}private void AddImageToDB(byte[] imageData, int id){try{using (SqlConnection connection = new SqlConnection(connectionString)){if (connection.State != System.Data.ConnectionState.Open){connection.Open();}using (SqlCommand cmd = connection.CreateCommand()){cmd.CommandText = "spImagesAdd";cmd.CommandType = System.Data.CommandType.StoredProcedure;cmd.Parameters.AddWithValue("@ID", id).SqlDbType = System.Data.SqlDbType.Int;cmd.Parameters.AddWithValue("@image", imageData).SqlDbType = System.Data.SqlDbType.Image;cmd.ExecuteNonQuery();}}}catch (Exception ex){throw ex;}}}}How to use the code in your codeWordUtility wordUtility = new WordUtility();
List collection = wordUtility.GetImageFromFile(test.doc");
wordUtility.SaveToDatabase(collection);
collection = null; Conclusion This article explained extract image from the MS word document using Microsoft Office Managed library and .NET Framework. |