I'm encountering an issue where a service is exiting on errors that should never propagate up.
I built a microservice manager (.NET as the local environment doesnt support .NET Core and some of its native microservice abilities)
Built in VS2019 targeting .NET 4.5.2 (I know, but this is the world we live in)
For the microservice manager, it is built and installed as a windows service. Entry looks like this (#if/#else was for testing locally, it is working as intended when registered as a windows service)
Program.cs (Entry point)
` static class Program
{
/// <summary>
/// The main entry point for the application.
/// </summary>
static void Main()
{
#if DEBUG
Scheduler myScheduler = new Scheduler();
myScheduler.OnDebug();
System.Threading.Thread.Sleep(System.Threading.Timeout.Infinite);
#else
ServiceBase[] ServicesToRun;
ServicesToRun = new ServiceBase[]
{
new Scheduler()
};
ServiceBase.Run(ServicesToRun);
#endif
}
}`
Scheduler.cs
//(confidential code hidden)
`private static readonly Configuration config = Newtonsoft.Json.JsonConvert.DeserializeObject<Configuration>(
File.ReadAllText(configFilePath)
);
public Scheduler()
{
//InitializeComponent(); //windows service, doesnt need UI components initialized
}
public void OnDebug()
{
OnStart(null); //triggers when developing locally
}
protected override async void OnStart(string[] args)
{
try
{
logger.Log($#"Service manager starting...");
logger.Log($#"Finding external services... {config.services.Count} services found.");
foreach (var service in config.services)
{
try
{
if (service.disabled)
{
logger.Log(
$#"Skipping {service.name}: disabled=true in Data Transport Service's appSettings.json file");
continue;
}
logger.Queue($#"Starting: {service.name}...");
string serviceLocation = service.useRelativePath
? Path.Combine(assemblyLocation, service.path)
: service.path;
var svc = Assembly.LoadFrom(serviceLocation);
var assemblyType = svc.GetType($#"{svc.GetName().Name}.Program");
var methodInfo = assemblyType.GetMethod("Main");
var instanceObject = Activator.CreateInstance(assemblyType, new object[0]);
methodInfo.Invoke(instanceObject, new object[0]);
logger.Queue(" Running").Send("");
}
catch (TargetInvocationException ex)
{
logger.Queue(" Failed").Send("");
logger.Log("an error occurred", LOG.LEVEL.CRITICAL, ex);
}
catch (Exception ex)
{
logger.Queue(" Failed").Send("");
logger.Log("an error occurred", LOG.LEVEL.CRITICAL, ex);
}
}
logger.Log("Finished loading services.");
}
catch (Exception ex)
{
logger.Log($#"Critical error encountered", LOG.LEVEL.CRITICAL, ex);
}
}
Microservice:
public [Confidential]()
{
if (currentProfile == null)
{
var errMsg =
$#"Service not loaded, Profile not found, check appSettings.currentProfile: '{config.currentProfile}'";
logger.Log(errMsg,severity: LOG.LEVEL.CRITICAL);
throw new SettingsPropertyNotFoundException(errMsg);
}
if (currentProfile.disabled)
{
var errMsg = $#"Service not loaded: {config.serviceName}, Service's appSettings.currentProfile.disabled=true";
logger.Log(errMsg,LOG.LEVEL.WARN);
throw new ArgumentException(errMsg);
}
logger.Log($#"Loading: '{config.serviceName}' with following configuration:{Environment.NewLine}{JsonConvert.SerializeObject(currentProfile,Formatting.Indented)}");
logger.Queue($#"Encrypting config file passwords...");
bool updateConfig = false;
foreach (var kafkaSource in config.dataTargets)
{
if (!kafkaSource.password.IsEncrypted())
{
updateConfig = true;
logger.Queue($#"%tabEncrypting: {kafkaSource.name}");
kafkaSource.password = kafkaSource.password.Encrypt();
}
else
{
logger.Queue($#"%tabAlready encrypted: {kafkaSource.name}");
}
}
logger.Send(Environment.NewLine);
if (updateConfig)
{
File.WriteAllText(
configFilePath,
Newtonsoft.Json.JsonConvert.SerializeObject(config));
}
var _source = config.dataSources.FirstOrDefault(x=>x.name==currentProfile.dataSource);
var _target = config.dataTargets.FirstOrDefault(x => x.name == currentProfile.dataTarget);
source = new Connectors.Sql(logger,
_source?.name,
_source?.connectionString,
_source.pollingInterval,
_source.maxRowsPerSelect,
_source.maxRowsPerUpdate);
target = new Connectors.KafkaProducer(logger)
{
bootstrapServers = _target?.bootstrapServers,
name = _target?.name,
password = _target?.password.Decrypt(),
sslCaLocation = Path.Combine(assemblyLocation,_target?.sslCaLocation),
topic = _target?.topic,
username = _target?.username
};
Start();
}
public void Start()
{
Timer timer = new Timer();
try
{
logger.Log($#"SQL polling interval: {source.pollingInterval} seconds");
timer.Interval = source.pollingInterval * 1000;
timer.Elapsed += new ElapsedEventHandler(this.OnTimer);
timer.Start();
if (currentProfile.executeOnStartup)
Run();
}
catch (Exception ex)
{
var sb = new StringBuilder();
sb.AppendLine($#"Critical error encountered loading external service: {config.serviceName}.");
if (!timer.Enabled)
sb.AppendLine($#"service unloaded - Schedule not started!");
else
sb.AppendLine($#"service appears to be loaded and running on schedule.");
logger.Log(sb.ToString(), LOG.LEVEL.CRITICAL, ex);
}
}
public void OnTimer(object sender, ElapsedEventArgs e)
{
try
{
Run();
}
catch (Exception ex)
{
logger.Log($#"Critical error during scheduled run on service: {config.serviceName}.", LOG.LEVEL.CRITICAL, ex);
}
}
public async void Run()
{
//Get new alarm events from SQL source
logger.Queue("Looking for new alarms...");
var rows = await GetNewEvents();`
The exception occurred during the GetNewEvents method, which attempted to open a SqlConnection to a SQL server that was unavailable due to network issues, that method intentionally throws an exception, which should throw up to OnTimer, where it gets caught, logged, and the timer keeps running. During development/testing, I used invalid credentials, bad connection string, etc and simulated this type of error and it worked as expected, logged the error, kept running. For some reason recently, that error is not caught in OnTimer, it propagates up, where it should be caught by Start (but isn't), after that it should be caught by the parent service manager which is entirely wrapped in a try/catch with no throw's, and above that (because their could be multiple microservices managed by that service) the entry point to the service manager is wrapped in try/catch with no throws, all for isolation from microservice errors. For some reason though, now, the error from a VERY downstream application is propagating all the way up.
Typically, this code runs 24/7 no issues, the microservice it is loading from the config file launches and runs fine. The entry into that specific microservice starts with a try {...} catch (Exception ex) {...} block.
The concept is to have a microservice manager than can launch a number of microservices without having to install all of them as windows services, and have some level of configuration driven by a config file that dictates how the main service runs.
The microservice represented here opens a SQL connection, reads data, performs business logic, publishes results to Kafka, it does this on a polling interval dictated by the config file contained in the microservice. As stated above, its ran for months without issue.
Recently, I noticed the main microservice manager service was not running on the windows server, I investigated the Server Application Logs and found a "Runtime Error" that essentially stated the microservice, while attempting to connect to sql, failed (network issue) and caused the entire microservice manager to exit. To my understanding, they way I'm launching the microservice should isolate it from the main service manager app. Additionally, the main service manager app is wrapped in a very generic try catch block. The entry point to the micro service itself is wrapped in a try catch, and almost every component in the microservice is wrapped in try / catch per business need. The scenario that faulted (cant connect to sql) intentionally throws an error for logging purposes, but should be caught by the immediate parent try/catch, which does not propagate or re-throw, only logs the error to a txt file and the windows server app log.
How is it that this exception is bubbling up through isolation points and causing the main service to fault and exit? I tested this extensively during development and prior to release, this exact scenario being unable to connect to sql, and it generated the correct log entry, and tried again on the next polling cycle as expected.
I haven't tried any other approaches as yet, as I feel they would be band-aid fixes as best as I dont understand why the original design is suddenly failing. The server hasn't changed, no patching/security updates/etc.
From the server Application Log:
Application: DataTransportService.exe
Framework Version: v4.0.30319
Description: The process was terminated due to an unhandled exception.
Exception Info: System.Exception
at Connectors.SqlHelper.DbHelper+d__13`1[[System.__Canon, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]].MoveNext()
at System.Runtime.CompilerServices.TaskAwaiter.ThrowForNonSuccess(System.Threading.Tasks.Task)
at System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
at IntelligentAlarms.IntelligentAlarm+d__14.MoveNext()
at System.Runtime.CompilerServices.TaskAwaiter.ThrowForNonSuccess(System.Threading.Tasks.Task)
at System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
at System.Runtime.CompilerServices.TaskAwaiter.ValidateEnd(System.Threading.Tasks.Task)
at IntelligentAlarms.IntelligentAlarm+d__12.MoveNext()
at System.Runtime.CompilerServices.AsyncMethodBuilderCore+<>c.b__6_1(System.Object)
at System.Threading.QueueUserWorkItemCallback.WaitCallback_Context(System.Object)
at System.Threading.ExecutionContext.RunInternal(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)
at System.Threading.ExecutionContext.Run(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)
at System.Threading.QueueUserWorkItemCallback.System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()
at System.Threading.ThreadPoolWorkQueue.Dispatch()
at System.Threading._ThreadPoolWaitCallback.PerformWaitCallback()
I am trying to simply get a list of all the Private Queues on the Server from my PC.
The error I am getting is "Access to Message Queuing system is denied."
I have given full access to (Everyone, NETWORK SERVICE and Anonymous Logon) the queue on the as there is only 1 queue....
Please see attached error and code below:-
try
{
MessageQueue[] queueList =
MessageQueue.GetPrivateQueuesByMachine("xxx.xxx.x.xxx");//Error
occurs here
foreach (MessageQueue queueItem in queueList)
{
Console.WriteLine(queueItem.Path);
}
return created;
}
catch (MessageQueueException m)
{
Console.WriteLine(m.Message + m.MessageQueueErrorCode);
}
catch (SystemException s)
{
Console.WriteLine(s.Message + s.StackTrace);
}
catch (Exception e)
{
Console.WriteLine(e);
throw;
}
On the computer manager of the target machine ("xxx.xxx.x.xxx" in your example) select "Services and applications" and then "Message Queuing". Right click on "Message Queuing", go to secuirty tab and add your account with full control. Thsi should give you access to the private queues on that machine
How can i catch the exception that occurs when starting a windows service. I am unable to get the exception here in my below code even though i am throwing exception in the Onstart() method of the service.
public class InterOpIntegrationWinService : ServiceBase
{
protected override void OnStart(string[] args)
{
throw new InvalidOperationException(message);
}
}
Calling thread code
try
{
using (ServiceController controller = new ServiceController())
{
controller.ServiceName = objServiceConfig.ServiceName;
controller.Start();
System.Windows.Forms.Application.DoEvents();
//controller.WaitForStatus(ServiceControllerStatus.Running, new TimeSpan(0, 0, 15));
//controller.WaitForStatus(ServiceControllerStatus.Running);
//if (!string.IsNullOrEmpty(LogUtilities.ServiceOnStartException))
//{
// MessageBox.Show("Error with starting service : " + LogUtilities.ServiceOnStartException);
// LogUtilities.ServiceOnStartException = string.Empty;
//}
}
}
catch (System.InvalidOperationException InvOpExcep)
{
DisplayError(InvOpExcep.Message);
LogUtilities.DisplayMessage("Failed to start service. " + LogUtilities.ServiceOnStartException, InvOpExcep);
LogUtilities.ServiceOnStartException = string.Empty;
}
catch (Exception ex)
{
DisplayError(ex.Message);
LogUtilities.DisplayMessage("Failed to start service. " + LogUtilities.ServiceOnStartException, ex);
LogUtilities.ServiceOnStartException = string.Empty;
}
i check for application license in the onstart() method and throws a licensing error if it fails. i want this to shared to my calling thread so i could show the message in a DialogBox. Any ideas of how to do this if i cannot handle the exceptions in my calling process.
Separate your service into (at least) two components - a component that deals with IPC in some form (e.g. Remoting, WCF endpoint, REST service, etc) and (one or more) components that do its actual job.
If the licensing check fails, don't start the other components - but do still start the component that offers IPC. After starting your service (which should now always at least start), you forms-based application can connect to the service and (through whatever means you want) determine that the service is currently refusing to provide any functionality due to a failed licensing check.
I have a Windows services running in .NET 4.5. Everything works fine. However, when my service encounters a SqlException, it hangs (turns into a zombie).
I have a timer (System.Timers) that calls process. In process, locate cmd.ExecuteReader(). If I remove EXECUTE permissions from the stored procedure, I receive a SqlException (as expected). When this happens, the service simply hangs.
I would have expected one of the try {} catch blocks to capture the exception and exit the method gracefully. However, the system appears to hang on this call. I had a number of Trace statements in the code. I removed them so it would be easier to read.
private void TimerForNotification_Elapsed(object sender, System.Timers.ElapsedEventArgs e)
{
TimerForNotification.Stop();
int count = new GetSMSNotifications().process();
TimerForNotification.Start();
}
public int process()
{
int count = 0;
// Get the ConnectionStrings collection.
ConnectionStringSettings connections = ConfigurationManager.ConnectionStrings["DE_OLTP"];
try
{
using (SqlConnection conn = new SqlConnection(connections.ConnectionString))
{
conn.Open();
SqlCommand cmd = new SqlCommand("[dbo].[get_SMSToSend]", conn);
cmd.CommandType = System.Data.CommandType.StoredProcedure;
try
{
SqlDataReader dr = cmd.ExecuteReader(System.Data.CommandBehavior.CloseConnection);
while (dr.Read())
{
// increment counter
count++;
string destinationAddress = Convert.ToString(dr[dr.GetOrdinal("DestinationAddress")]);
string alertMessage = Convert.ToString(dr[dr.GetOrdinal("Content")]);
// Send out the notification
sendPush(destinationAddress, alertMessage);
}
dr.Close();
}
catch (SqlException se)
{
DELog.Log.Error(se);
}
}
}
catch (Exception ex)
{
DELog.Log.Error(ex);
}
return count;
}
Interestingly, I created a console app that calls the above method and the try {} catch block works as expected.
I don't see any unhandled exceptions in the event log.
Thoughts?
Problem solved. The reason why it appeared my service was hanging is because I was a missing a reference to Entity.Framework.dll. When the service ran into an exception, the EF dll could not be found. I use the EF in my logging layer.
I was able to discover this issue by installing my service and then ATTACHing to the run process.
Try these things:
Add trace line to these:
TimerForNotification.Stop();
try
{
int count = new GetSMSNotifications().process();
}
catch (Exception ex)
{
// trace line here with ex.ToString().
}
TimerForNotification.Start();
// trace line here stating, i started at DateTime.Now;
Remove execute permissions and see if the above code generates a trace line.
If it does, then the exception details will show up. (even though your method looks very safe) Fix it accordingly.
If the catch doesn't trace, then you should see trace lines for the post start trace which means the service is working as expected.
Have a windows service that listens to a msmq. In the OnStart method is have this
protected override void OnStart(string[] args)
{
try
{
_queue = new MessageQueue(_qPath);//this part works as i had logging before and afer this call
//Add MSMQ Event
_queue.ReceiveCompleted += new ReceiveCompletedEventHandler(queue_ReceiveCompleted);//this part works as i had logging before and afer this call
_queue.BeginReceive();//This is where it is failing - get a null reference exception
}
catch(Exception ex)
{
EventLogger.LogEvent(EventSource, EventLogType, "OnStart" + _lineFeed +
ex.InnerException.ToString() + _lineFeed + ex.Message.ToString());
}
}
where
private MessageQueue _queue = null;
This works on my machine but when deployed to a windows 2003 server and running as Network service account, it fails
Exception recvd:
Service cannot be started. System.NullReferenceException: Object reference not set to an instance of an object.
at MYService.Service.OnStart(String[] args)
at System.ServiceProcess.ServiceBase.ServiceQueuedMainCallback(Object state)
Solved:
Turned out that the Q that i set up, I had to explicitly add Network Service account to it under security tab
You're seeing that particular exception because you're calling ex.InnerException.ToString(). The InnerException property is not always populated (in fact, it frequently isn't, nor should it be).
Your root problem is likely that the Network Service account doesn't have permissions to access the queue (in this case, read from it).
Here's some code that will help you get the actual error in your event log:
catch(Exception ex)
{
Exception e = ex;
StringBuilder message = new StringBuilder();
while(e != null)
{
if(message.Length > 0) message.AppendLine("\nInnerException:");
message.AppendLine(e.ToString());
e = e.InnerException;
}
EventLogger.LogEvent(EventSource, EventLogType, "OnStart" + _lineFeed +
message.ToString());
}