I'm encountering an issue where a service is exiting on errors that should never propagate up.
I built a microservice manager (.NET as the local environment doesnt support .NET Core and some of its native microservice abilities)
Built in VS2019 targeting .NET 4.5.2 (I know, but this is the world we live in)
For the microservice manager, it is built and installed as a windows service. Entry looks like this (#if/#else was for testing locally, it is working as intended when registered as a windows service)
Program.cs (Entry point)
` static class Program
{
/// <summary>
/// The main entry point for the application.
/// </summary>
static void Main()
{
#if DEBUG
Scheduler myScheduler = new Scheduler();
myScheduler.OnDebug();
System.Threading.Thread.Sleep(System.Threading.Timeout.Infinite);
#else
ServiceBase[] ServicesToRun;
ServicesToRun = new ServiceBase[]
{
new Scheduler()
};
ServiceBase.Run(ServicesToRun);
#endif
}
}`
Scheduler.cs
//(confidential code hidden)
`private static readonly Configuration config = Newtonsoft.Json.JsonConvert.DeserializeObject<Configuration>(
File.ReadAllText(configFilePath)
);
public Scheduler()
{
//InitializeComponent(); //windows service, doesnt need UI components initialized
}
public void OnDebug()
{
OnStart(null); //triggers when developing locally
}
protected override async void OnStart(string[] args)
{
try
{
logger.Log($#"Service manager starting...");
logger.Log($#"Finding external services... {config.services.Count} services found.");
foreach (var service in config.services)
{
try
{
if (service.disabled)
{
logger.Log(
$#"Skipping {service.name}: disabled=true in Data Transport Service's appSettings.json file");
continue;
}
logger.Queue($#"Starting: {service.name}...");
string serviceLocation = service.useRelativePath
? Path.Combine(assemblyLocation, service.path)
: service.path;
var svc = Assembly.LoadFrom(serviceLocation);
var assemblyType = svc.GetType($#"{svc.GetName().Name}.Program");
var methodInfo = assemblyType.GetMethod("Main");
var instanceObject = Activator.CreateInstance(assemblyType, new object[0]);
methodInfo.Invoke(instanceObject, new object[0]);
logger.Queue(" Running").Send("");
}
catch (TargetInvocationException ex)
{
logger.Queue(" Failed").Send("");
logger.Log("an error occurred", LOG.LEVEL.CRITICAL, ex);
}
catch (Exception ex)
{
logger.Queue(" Failed").Send("");
logger.Log("an error occurred", LOG.LEVEL.CRITICAL, ex);
}
}
logger.Log("Finished loading services.");
}
catch (Exception ex)
{
logger.Log($#"Critical error encountered", LOG.LEVEL.CRITICAL, ex);
}
}
Microservice:
public [Confidential]()
{
if (currentProfile == null)
{
var errMsg =
$#"Service not loaded, Profile not found, check appSettings.currentProfile: '{config.currentProfile}'";
logger.Log(errMsg,severity: LOG.LEVEL.CRITICAL);
throw new SettingsPropertyNotFoundException(errMsg);
}
if (currentProfile.disabled)
{
var errMsg = $#"Service not loaded: {config.serviceName}, Service's appSettings.currentProfile.disabled=true";
logger.Log(errMsg,LOG.LEVEL.WARN);
throw new ArgumentException(errMsg);
}
logger.Log($#"Loading: '{config.serviceName}' with following configuration:{Environment.NewLine}{JsonConvert.SerializeObject(currentProfile,Formatting.Indented)}");
logger.Queue($#"Encrypting config file passwords...");
bool updateConfig = false;
foreach (var kafkaSource in config.dataTargets)
{
if (!kafkaSource.password.IsEncrypted())
{
updateConfig = true;
logger.Queue($#"%tabEncrypting: {kafkaSource.name}");
kafkaSource.password = kafkaSource.password.Encrypt();
}
else
{
logger.Queue($#"%tabAlready encrypted: {kafkaSource.name}");
}
}
logger.Send(Environment.NewLine);
if (updateConfig)
{
File.WriteAllText(
configFilePath,
Newtonsoft.Json.JsonConvert.SerializeObject(config));
}
var _source = config.dataSources.FirstOrDefault(x=>x.name==currentProfile.dataSource);
var _target = config.dataTargets.FirstOrDefault(x => x.name == currentProfile.dataTarget);
source = new Connectors.Sql(logger,
_source?.name,
_source?.connectionString,
_source.pollingInterval,
_source.maxRowsPerSelect,
_source.maxRowsPerUpdate);
target = new Connectors.KafkaProducer(logger)
{
bootstrapServers = _target?.bootstrapServers,
name = _target?.name,
password = _target?.password.Decrypt(),
sslCaLocation = Path.Combine(assemblyLocation,_target?.sslCaLocation),
topic = _target?.topic,
username = _target?.username
};
Start();
}
public void Start()
{
Timer timer = new Timer();
try
{
logger.Log($#"SQL polling interval: {source.pollingInterval} seconds");
timer.Interval = source.pollingInterval * 1000;
timer.Elapsed += new ElapsedEventHandler(this.OnTimer);
timer.Start();
if (currentProfile.executeOnStartup)
Run();
}
catch (Exception ex)
{
var sb = new StringBuilder();
sb.AppendLine($#"Critical error encountered loading external service: {config.serviceName}.");
if (!timer.Enabled)
sb.AppendLine($#"service unloaded - Schedule not started!");
else
sb.AppendLine($#"service appears to be loaded and running on schedule.");
logger.Log(sb.ToString(), LOG.LEVEL.CRITICAL, ex);
}
}
public void OnTimer(object sender, ElapsedEventArgs e)
{
try
{
Run();
}
catch (Exception ex)
{
logger.Log($#"Critical error during scheduled run on service: {config.serviceName}.", LOG.LEVEL.CRITICAL, ex);
}
}
public async void Run()
{
//Get new alarm events from SQL source
logger.Queue("Looking for new alarms...");
var rows = await GetNewEvents();`
The exception occurred during the GetNewEvents method, which attempted to open a SqlConnection to a SQL server that was unavailable due to network issues, that method intentionally throws an exception, which should throw up to OnTimer, where it gets caught, logged, and the timer keeps running. During development/testing, I used invalid credentials, bad connection string, etc and simulated this type of error and it worked as expected, logged the error, kept running. For some reason recently, that error is not caught in OnTimer, it propagates up, where it should be caught by Start (but isn't), after that it should be caught by the parent service manager which is entirely wrapped in a try/catch with no throw's, and above that (because their could be multiple microservices managed by that service) the entry point to the service manager is wrapped in try/catch with no throws, all for isolation from microservice errors. For some reason though, now, the error from a VERY downstream application is propagating all the way up.
Typically, this code runs 24/7 no issues, the microservice it is loading from the config file launches and runs fine. The entry into that specific microservice starts with a try {...} catch (Exception ex) {...} block.
The concept is to have a microservice manager than can launch a number of microservices without having to install all of them as windows services, and have some level of configuration driven by a config file that dictates how the main service runs.
The microservice represented here opens a SQL connection, reads data, performs business logic, publishes results to Kafka, it does this on a polling interval dictated by the config file contained in the microservice. As stated above, its ran for months without issue.
Recently, I noticed the main microservice manager service was not running on the windows server, I investigated the Server Application Logs and found a "Runtime Error" that essentially stated the microservice, while attempting to connect to sql, failed (network issue) and caused the entire microservice manager to exit. To my understanding, they way I'm launching the microservice should isolate it from the main service manager app. Additionally, the main service manager app is wrapped in a very generic try catch block. The entry point to the micro service itself is wrapped in a try catch, and almost every component in the microservice is wrapped in try / catch per business need. The scenario that faulted (cant connect to sql) intentionally throws an error for logging purposes, but should be caught by the immediate parent try/catch, which does not propagate or re-throw, only logs the error to a txt file and the windows server app log.
How is it that this exception is bubbling up through isolation points and causing the main service to fault and exit? I tested this extensively during development and prior to release, this exact scenario being unable to connect to sql, and it generated the correct log entry, and tried again on the next polling cycle as expected.
I haven't tried any other approaches as yet, as I feel they would be band-aid fixes as best as I dont understand why the original design is suddenly failing. The server hasn't changed, no patching/security updates/etc.
From the server Application Log:
Application: DataTransportService.exe
Framework Version: v4.0.30319
Description: The process was terminated due to an unhandled exception.
Exception Info: System.Exception
at Connectors.SqlHelper.DbHelper+d__13`1[[System.__Canon, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]].MoveNext()
at System.Runtime.CompilerServices.TaskAwaiter.ThrowForNonSuccess(System.Threading.Tasks.Task)
at System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
at IntelligentAlarms.IntelligentAlarm+d__14.MoveNext()
at System.Runtime.CompilerServices.TaskAwaiter.ThrowForNonSuccess(System.Threading.Tasks.Task)
at System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
at System.Runtime.CompilerServices.TaskAwaiter.ValidateEnd(System.Threading.Tasks.Task)
at IntelligentAlarms.IntelligentAlarm+d__12.MoveNext()
at System.Runtime.CompilerServices.AsyncMethodBuilderCore+<>c.b__6_1(System.Object)
at System.Threading.QueueUserWorkItemCallback.WaitCallback_Context(System.Object)
at System.Threading.ExecutionContext.RunInternal(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)
at System.Threading.ExecutionContext.Run(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)
at System.Threading.QueueUserWorkItemCallback.System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()
at System.Threading.ThreadPoolWorkQueue.Dispatch()
at System.Threading._ThreadPoolWaitCallback.PerformWaitCallback()
I have a Windows Service that I successfully deploys, successfully works when debugging, but crashes when a file is added to the monitored directory.
I thought it was an issue with my impersonator being used between the OnStart and InputOnChanged, but the crash still happens when I run the service under my own domain user.
I have EventLog set to write to it's own application source, but none of my WriteEntrys are called except the one in the OnStart function. I've been trying different tweaks and feel like I need another set of eyes to see something i'm not:
protected override void OnStart(string[] args)
{
//using(Impersonator context = new Impersonator("XXXXX", "XXXXXXXX", "XXXXXXXXXX"))
//{
try
{
this.fileWatcherService = new FileSystemWatcher(baseFilePath, "*.txt")
{
NotifyFilter = NotifyFilters.LastWrite
};
fileWatcherService.Changed += InputOnChanged;
fileWatcherService.EnableRaisingEvents = true;
eventLog.WriteEntry("XXXX-XXXXX-Service Started");
}
catch (Exception ex)
{
eventLog.WriteEntry($"{baseFilePath} was not accessible to monitor because {ex.Message}", EventLogEntryType.Error);
}
}
protected void InputOnChanged(object source, FileSystemEventArgs e)
{
if (e.ChangeType == WatcherChangeTypes.Changed)
{
eventLog.WriteEntry($"Change Detected - File {e.Name}", EventLogEntryType.Information);
try
{
fileWatcherService.EnableRaisingEvents = false;
eventLog.WriteEntry("Starting process for file: " + e.Name);
if (!File.Exists(e.FullPath))
{
eventLog.WriteEntry($"{e.Name} was not accessible", EventLogEntryType.Error);
}
//Copy File to backup copy before formatting
File.Copy(e.FullPath
, Path.Combine(#"\\XXXXXX\XXXXXX\XXXXXXXX\XXXXXXXXXX XXX XXXXXXXXX\XXX\XXXX\XXXX\BackupFiles", GetBackupFileName(e.Name))
, false);
//Save formatted file to directory
List<string> lines = System.IO.File.ReadAllLines(e.FullPath).ToList();
File.WriteAllText(Path.Combine(#"\\XXXXXX\XXXXXX\XXXXXXXX\XXXXXXXXXX XXX XXXXXXXXX\XXX\XXXX\XXXX\FormattedFiles", GetFormattedFileName(e.Name))
, CSVFormatService.FormatLines(lines));
//Remove file from base path to prevent re-processing
File.Delete(e.FullPath);
eventLog.WriteEntry($"Successfully moved {e.FullPath}", EventLogEntryType.Information);
}
catch (Exception ex)
{
eventLog.WriteEntry("XXXX-XXXXX-Service exception: " + ex.Message, EventLogEntryType.Error);
}
finally
{
fileWatcherService.EnableRaisingEvents = true;
}
}
}
Would expect eventLog.WriteEntry("Starting process for file: " + e.Name); to update the Application log at least because that is before any attempt to touch a file, but I don't see that in the log. However, the service runs until I place a test file in the monitored directory, and then crashes with a unhandled exception of file does not exist
When building out these services, make sure you reference a shared project correctly. This issue was caused by adding a reference to a class library to the project, but the .dll was missing when deploying the service. So when the service tried to access the .dll to process data a FileNotFound exception was being thrown. This also make sense as to why the exception was marked as unhandled.
I am trying to get application pool status from within a web application. Application pool I am interested in is "ABC" but when I check for it by name I get nothing and when I inspect the available pools by name (i.e. manager.ApplicationPools) they show as Clr4ClassicAppPool, Clr4IntegratedAppPool, ... so I never find a match.
This is what I am using
public static int GetAppPoolStatus(string sAppPoolName)
{
int iRet = -1;
try
{
using (ServerManager manager = new ServerManager())
{
ApplicationPool appPool = manager.ApplicationPools.FirstOrDefault(ap => ap.Name == sAppPoolName);
if (appPool != null)
{
//Get the current state of the app pool
iRet = (int)appPool.State; // 0: Starting, 1: Started, 2: Stopping, 3: Stopped
}
else
{
}
}
}
catch (Exception ex)
{ }
return iRet;
}
The GAC version (7.9.0.0) was part of IIS Express, and is resolved by MSBuild when compiling your project if your project file does not explicitly point to %SystemRoot%\system32\inetsrv\Microsoft.Web.Administration.dll.
The ultimate solution (if not to uninstall IIS Express) is to add a reference explicitly to %SystemRoot%\system32\inetsrv\Microsoft.Web.Administration.dll to consume the right metadata at compile time. It has side effects, but still a reliable way. And at runtime, use assembly redirection to stick to version 7.0.0.0.
More tips can be found in this post
I am trying to simply get a list of all the Private Queues on the Server from my PC.
The error I am getting is "Access to Message Queuing system is denied."
I have given full access to (Everyone, NETWORK SERVICE and Anonymous Logon) the queue on the as there is only 1 queue....
Please see attached error and code below:-
try
{
MessageQueue[] queueList =
MessageQueue.GetPrivateQueuesByMachine("xxx.xxx.x.xxx");//Error
occurs here
foreach (MessageQueue queueItem in queueList)
{
Console.WriteLine(queueItem.Path);
}
return created;
}
catch (MessageQueueException m)
{
Console.WriteLine(m.Message + m.MessageQueueErrorCode);
}
catch (SystemException s)
{
Console.WriteLine(s.Message + s.StackTrace);
}
catch (Exception e)
{
Console.WriteLine(e);
throw;
}
On the computer manager of the target machine ("xxx.xxx.x.xxx" in your example) select "Services and applications" and then "Message Queuing". Right click on "Message Queuing", go to secuirty tab and add your account with full control. Thsi should give you access to the private queues on that machine
I install and start a service with my C# console application.
Installing the service using WinAPI:
try
{
//Open sc manager
IntPtr sc_handle = OpenSCManager(null, null, SC_MANAGER_CREATE_SERVICE);
if (sc_handle.ToInt32() != 0)
{
//Create service
IntPtr sv_handle = CreateService(sc_handle, svcName, svcDispName, SERVICE_ALL_ACCESS, SERVICE_WIN32_OWN_PROCESS, SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, svcPath, null, 0, null, null, null);
//If failed
if (sv_handle.ToInt32() == 0)
{
CloseServiceHandle(sc_handle);
return false;
}
else //If succeeded
{
CloseServiceHandle(sv_handle);
CloseServiceHandle(sc_handle);
return true;
}
}
else
return false;
}
catch (Exception e)
{
throw e;
}
Starting the service:
//Start the newly installed service
try
{
System.ServiceProcess.ServiceController newService = GetService(application.StartupName);
//If service is stopped or stopping
if (newService.Status == System.ServiceProcess.ServiceControllerStatus.StopPending ||
newService.Status == System.ServiceProcess.ServiceControllerStatus.Stopped)
{
newService.Start();
newService.WaitForStatus(System.ServiceProcess.ServiceControllerStatus.Running, TimeSpan.FromMilliseconds(300.0));
}
return true;
}
catch (Exception e)
{
return false;
}
When setting up a service I install and start it.
When I do this for a very simple service (which does nothing but log its events to a text file), it fails the first time when trying to start the service.
It is the call to WaitForStatus which fails (timeout error).
If I uninstall the service (sc delete servicename), delete the file it logged too and install/start the service again under the same OR a different name, it works fine every time.
If I uninstall the service and search the registry for the service name, there are no results.
I've tried this troubleshooting on 3 different machines (windows 8 and windows 7). I have no other machines to test with and cannot recreate the problem. I even did a system restore to a point BEFORE installing/starting the service for the first time and it still worked without error.
Anyone got any ideas?