Introduction
Yesterday my friends asked a question the forum, how we could extract email address from the word document. Nice question, according to that I would like do a simple sample project for this. I have sample word line in word documentation like followings. "hi i m ashish and my id is aashish_ddd@gmail.com . thanks for reply". So we need extract emaill address from the above line. but its not in the text file it's in the word file. cool First we need create a sample project using Visual Studio 2008. After created project you need to add reference to access word files in .net , to this right click on your project , then select Add Reference context menu, and then select Microsoft.Office.Interop.Word then you need to write code with in the program.cs file. {codecitation class="brush: c#; gutter: true;" width="500px"} using System; using System.Collections.Generic; using System.Linq; using System.Text; using Microsoft.Office.Interop.Word; using System.Text.RegularExpressions;
namespace ConsoleApplication1 { class Program { static void Main(string[] args) { new Program().ReadWord(); }
object fileName = "D:\\d.doc"; private void ReadWord() { ApplicationClass appWord = new ApplicationClass(); object nullobj = System.Reflection.Missing.Value; Document document = appWord.Documents.Open(ref fileName, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj);
int i = 0; List emailCollection = new List(); foreach (Microsoft.Office.Interop.Word.Paragraph objParagraph in document.Paragraphs) { try { string emailaddress = document.Paragraphs[1].Range.Text; emailaddress = EmailExtractot(emailaddress).TrimEnd(); if (IsEmail(emailaddress)) { emailCollection.Add(emailaddress); } } catch (Exception ex) { throw ex; } i++;
} // close document and Quit Word document.Close(ref nullobj, ref nullobj, ref nullobj); foreach (string emailaddres in emailCollection) { Console.WriteLine(emailaddres); Console.ReadLine(); } } public const string MatchEmailPattern = @"^(([\w-]+\.)+[\w-]+|([a-zA-Z]{1}|[\w-]{2,}))@" + @"((([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])\.([0-1]? [0-9]{1,2}|25[0-5]|2[0-4][0-9])\." + @"([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])\.([0-1]? [0-9]{1,2}|25[0-5]|2[0-4][0-9])){1}|" + @"([a-zA-Z]+[\w-]+\.)+[a-zA-Z]{2,4})$";
public static bool IsEmail(string email) { if (email != null) return Regex.IsMatch(email, MatchEmailPattern); else return false; }
private static string EmailExtractot(string orginal) { int index = orginal.IndexOf('@'); int beforeEmptySpace = orginal.Substring(0, index).LastIndexOf(' '); string partialstring = orginal.Substring(index, (orginal.Length - index)); int afterEmptySpace = partialstring.IndexOf(' '); string emailAddress = orginal.Substring(beforeEmptySpace + 1, (index - beforeEmptySpace) + afterEmptySpace); return emailAddress; }
} } {/codecitation} That's all, just build application and run.Please send your comment through the info@codegain.com Thank you RRaveen |