/*****************************************************************************/
/*
                                  lang.c

Loads and processes the language files.


NOTE TO LANGUAGE FILE AUTHORS
-----------------------------
Messages can be used in all sorts of contexts, particularly inside string
literal quotes - both single and double.  It is therefore necessary to
substitute the HTML entities &quot;, &lsquo;, &rsquo;, etc., for anything that
might be misinterpreted as C-language or JavaScript code quotes.


COPYRIGHT
---------
Copyright (C) 2005-2024 Mark G.Daniel
This program, comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it under the
conditions of the GNU GENERAL PUBLIC LICENSE, version 3, or any later version.


VERSION HISTORY
---------------
27-JUN-2010  MGD  bugfix; LangLoad() when WATCHing array boundary
01-OCT-2007  MGD  LangOctalEntity() to octal-escape HTML entities for
                    use in JavaScript alert() and confirm() strings 
30-MAR-2006  MGD  LangLoad() in conjunction with LANGDEF.H used to provide
                    details on missing/extra entries in a language file
15-MAR-2006  MGD  LangSame() modify comparison so that the message can
                    contain HTML entities useful in some buttons (e.g. &#10;)
11-MAR-2006  MGD  LangLoad() convert unescaped newlines to spaces
01-FEB-2005  MGD  initial
*/

/*****************************************************************************/

#ifdef SOYMAIL_VMS_V7
#undef _VMS_V6_SOURCE
#define _VMS_V6_SOURCE
#undef __VMS_VER
#define __VMS_VER 70000000
#undef __CRTL_VER
#define __CRTL_VER 70000000
#endif

#pragma nomember_alignment

/* standard C header files */
#include <ctype.h>
#include <errno.h>
#include <stat.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <types.h>

/* VMS related header files */
#include <descrip.h>
#include <rmsdef.h>
#include <ssdef.h>
#include <stsdef.h>

/* application header file */
#include "soymail.h"
#include "lang.h"
#include "config.h"

#define FI_LI __FILE__, __LINE__

/* global storage */

/* this include provides a global storage array of required message names */
#include "langdef.h"

int  LangMsgCount,
     LangTextLength;

char  *LangTextPtr;

/* each has enough spare space to load extra messages to report file errors */
#define LANG_MSG_EXTRA 32
char  *LangMsgNamePtr [LANG_MSG_TOTAL+LANG_MSG_EXTRA],
      *LangMsgValuePtr [LANG_MSG_TOTAL+LANG_MSG_EXTRA];

/* external storage */

extern BOOL  Debug,
             WatchEnabled;

extern CONFIG_DATA  SoyMailConfig;

/*****************************************************************************/
/*
The user's optioned language name is passed as a parameter. Load the language
file into memory as a series of newline-delimitted strings.  Then leaving that
in memory parse the message names and message text and point at the start of
each from the respective message pointer array. If the optioned langauge is not
found provide a status message and fall back to the site's default language
(can be non-English).  If the default language cannot be found fall back again
to the mandatory language, English, and provide a status message.  If that
can't be found - give up and exit!
*/

void LangLoad (char *LangName)

{
   BOOL  WatchIsEnabled;
   int  lmidx, ldidx, status,
        LangErrorCount;
   char  *cptr, *sptr;
   char  FileName [256];

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "LangLoad() |%s|\n", LangName);

   cptr = LangName;
   if (!cptr || !*cptr) cptr = LANG_MANDATORY;

   /* first the optioned language file (if any) then fall-back to English */
   for (;;)
   {
      sprintf (FileName, "SOYMAIL_LANG:%s.TXT", cptr);
      if (WatchEnabled) WatchThis ("LANGUAGE source !AZ", FileName);
      status = ReadFileIntoMemory (FileName, &LangTextPtr, &LangTextLength);
      if (VMSok (status)) break;
      if (status != RMS$_FNF) ErrorExit (status, FI_LI);
      if (!strcmp (cptr, LANG_MANDATORY)) 
      {
         CgiLibResponseError (FI_LI, status, "English (mandatory) language.");
         exit (SS$_NORMAL);
      }
      /* provide a language not found status message */
      StatusMessage (FI_LI, 1, "Language &quot;%s&quot;: %s.",
                     cptr, SysGetMsg(status));
      cptr = LANG_MANDATORY;
   }

   /* don't need all those parse messages now we have langdef.h */
   WatchIsEnabled = WatchEnabled;
   WatchEnabled = FALSE;

   LangMsgCount = lmidx = 0;

   while (*LangTextPtr)
   {
      cptr = ConfigTextParse (&LangTextPtr, &LangMsgNamePtr[lmidx],
                                            &LangMsgValuePtr[lmidx]);
      if (!cptr) break;

      /* convert newlines to spaces unless escaped */
      for (cptr = LangMsgValuePtr[lmidx]; *cptr; cptr++)
      {
         /* do not shuffle to eliminate backslashes unless necessary */
         if (*(USHORTPTR)cptr == '\\\n')
         {
            /* ok, now it's necessary! */
            for (sptr = cptr; *cptr; *sptr++ = *cptr++)
               if (*(USHORTPTR)cptr == '\\\n') cptr++;
            *sptr = '\0';
            break;
         }
         if (*cptr == '\n') *cptr = ' ';
      }

      if (WatchIsEnabled && !LangMsgValuePtr[lmidx][0])
         WatchThis ("LANGUAGE empty [!AZ]", LangMsgNamePtr[lmidx]);

      if (LangMsgCount < LANG_MSG_TOTAL+LANG_MSG_EXTRA) lmidx++;
      LangMsgCount++;
   }

   WatchEnabled = WatchIsEnabled;

   /* also perform the check and integrity report when watch is enabled */
   if (!WatchEnabled && LangMsgCount == LANG_MSG_TOTAL) return;

   /**********************/
   /* check message file */
   /**********************/

   if (WatchEnabled)
   {
      LangErrorCount = 0;

      /* display messages that should be there and aren't */
      for (ldidx = 0; ldidx < LANG_MSG_TOTAL-1; ldidx++)
      {
         for (lmidx = 0; lmidx < LangMsgCount; lmidx++)
            if (strsame (LangMsgNamePtr[lmidx], LangDefList[ldidx], -1))
               break;
         if (lmidx >= LangMsgCount)
         {
            LangErrorCount++;
            WatchThis ("LANGUAGE missing [!AZ]", LangDefList[ldidx]);
         }
      }
      /* display messages that shouldn't be there and are */
      for (lmidx = 0; lmidx < LangMsgCount; lmidx++)
      {
         for (ldidx = 0; ldidx < LANG_MSG_TOTAL-1; ldidx++)
            if (strsame (LangMsgNamePtr[lmidx], LangDefList[ldidx], -1))
               break;
         if (ldidx >= LANG_MSG_TOTAL)
         {
            LangErrorCount++;
            WatchThis ("LANGUAGE unknown [!AZ]", LangMsgNamePtr[lmidx]);
         }
      }

      if (WatchEnabled)
         WatchThis ("LANGUAGE !UL/!UL messages, !UL error!%S",
                    LangMsgCount, LANG_MSG_TOTAL, LangErrorCount);
   }

   if (LangMsgCount != LANG_MSG_TOTAL)
   {
      CgiLibResponseError (FI_LI, SS$_BUGCHECK, "language file.");
      exit (SS$_NORMAL);
   }
}

/*****************************************************************************/
/*
Return a pointer to the local language equivalent of the English language
string parameter.
*/

char* LangFor (char *StringPtr)

{
   static char  NotFoundBuffer [64];

   int  idx;
   char  *cptr, *sptr, *zptr;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "LangFor() |%s|\n", StringPtr);

   for (idx = 0; idx < LangMsgCount; idx++)
   {
      if (!strsame (LangMsgNamePtr[idx], StringPtr, -1)) continue;
      if (Debug) fprintf (stdout, "|%s|\n", LangMsgValuePtr[idx]);
      return (LangMsgValuePtr[idx]);
   }

   zptr = (sptr = NotFoundBuffer) + sizeof(NotFoundBuffer)-2;
   *sptr++ = '?';
   for (cptr = StringPtr; *cptr && sptr < zptr; *sptr++ = *cptr++);
   *sptr++ = '?';
   *sptr = '\0';
   return (NotFoundBuffer);
}

/*****************************************************************************/
/*
Compares the language string specified by 'MsgNamePtr' with the string
specified by 'StringPtr'.  As language strings can contain HTML entities, if
the straight comparison fails then it's necessary to HTML-entify the string. 
Only entities permitted are &lt;, &gt;, &amp;, &quot;, and numeric enitites for
characters greater than 127 decimal.  If the comparison string is NULL the
match fails.
*/

BOOL LangSame
(
char *MsgNamePtr,
char *StringPtr
)
{
   int  idx;
   char  *cptr, *lptr, *sptr, *tptr, *zptr;
   char  charBuf [2],
         dentBuf [256];

   /*********/
   /* begin */
   /*********/

   if (!StringPtr) return (FALSE);

   lptr = "-?-";
   for (idx = 0; idx < LangMsgCount; idx++)
   {
      if (!strsame (LangMsgNamePtr[idx], MsgNamePtr, -1)) continue;
      lptr = LangMsgValuePtr[idx];
   }

   if (!strcmp (StringPtr, lptr))
   {
      if (WatchEnabled)
         WatchThis ("LANG TRUE \'!AZ\' \'!AZ\' \'!AZ\'",
                    MsgNamePtr, StringPtr, lptr);
      return (TRUE);
   }

   if (!strchr (lptr, '&'))
   {
      /* there are no entified characters in the message */
      if (WatchEnabled)
         WatchThis ("LANG FALSE \'!AZ\' \'!AZ\' \'!AZ\'",
                    MsgNamePtr, StringPtr, lptr);
      return (FALSE);
   }

   zptr = (sptr = dentBuf) + sizeof(dentBuf)-1;
   for (cptr = lptr; *cptr && sptr < zptr; *sptr++ = *cptr++);
   if (sptr >= zptr) ErrorExit (SS$_BUGCHECK, FI_LI);
   *sptr = '\0';
   CgiLibHtmlDeEntify (dentBuf);

   cptr = dentBuf;
   sptr = StringPtr;
   while (*cptr && *sptr)
   {
      /* cater for WIN32/DOS-style carriage-control */
      if (*(USHORTPTR)sptr == '\r\n') sptr++;
      /* Opera 8.5 (at least) substitutes a space for &#10; in buttons! */
      if (!(*cptr == '\n' && *sptr == ' '))
         if (*cptr != *sptr)
            break;
      cptr++;
      sptr++;
   }
   if (!*cptr && !*sptr)
   {
      if (WatchEnabled)
         WatchThis ("LANG TRUE \'!AZ\' \'!AZ\' \'!AZ\' \'!AZ\'",
                    MsgNamePtr, StringPtr, lptr, dentBuf);
      return (TRUE);
   }
   if (WatchEnabled)
      WatchThis ("LANG FALSE \'!AZ\' \'!AZ\' \'!AZ\' \'!AZ\'",
                 MsgNamePtr, StringPtr, lptr, dentBuf);
   return (FALSE);
}

/*****************************************************************************/
/*
There is a complication in the rendering of HTML entities when in JavaScript
alerts and confirms.  This function converts recognised entities into an octal
equiavlent for use in JavaScript alerts.  Returns a pointer to a dynamic string
if escaping required.

Apparently this is (perhaps a little) known phenomenon:

  Accents in alerts posted 25th November 2003

      Ever needed to put accented characters such as é into a JavaScript alert?
      Its surprisingly problematic. Consider this simple function:

      function accentTest1() {
         alert('M&eacute;nage &agrave; trois.')
      }

      If you invoke the function, you get an alert as expected, but the HTML
      entities are not entified, they get them spelled out as typed in the
      code.

      To get around this problem, you have to use octal-encoded characters
      instead of HTML entities:

      alert('M\351nage \340 trois.')

      PJB has published a handy table for converting between octal, hex and
      HTML entities.

  http://clagnut.com/blog/261/

The 'handy table' can be found at:

  http://www.pjb.com.au/comp/diacritics.html

And is reproduced here in case it disappears:

  US-Kbd    Octal  Hex    HTML           Details

  Alt-"     \242   A2    &cent;      ¢  cent sign
  Alt-#     \243   A3    &pound;     £  Pound sign (British)
  Alt-%     \245   A5    &yen;       ¥  Yen sign (Japanese)

  Alt-<     \274   BC    &frac14;    ¼  fraction one-quarter
  Alt-=     \275   BD    &frac12;    ½  fraction one-half
  Alt->     \276   BE    &frac34;    ¾  fraction three-quarters

  Alt-!     \241   A1    &iexcl;     ¡  upside-down exclamation mark
  Alt-+     \253   AB    &laquo;     «  open chevron-style quotes
  Alt-;     \273   BB    &raquo;     »  close chevron-style quotes
  Alt-?     \277   BF    &iquest;    ¿  upside-down question mark

  Alt-@     \300   C0    &Agrave;    À  capital A, grave accent
  Alt-A     \301   C1    &Aacute;    Á  capital A, acute accent
  Alt-B     \302   C2    &Acirc;     Â  capital A, circumflex accent
  Alt-C     \303   C3    &Atilde;    Ã  capital A, tilde
  Alt-D     \304   C4    &Auml;      Ä  capital A, umlaut mark
  Alt-E     \305   C5    &Aring;     Å  capital A, ring
  Alt-F     \306   C6    &AElig;     Æ  capital AE diphthong
  Alt-G     \307   C7    &Ccedil;    Ç  capital C, cedilla
  Alt-H     \310   C8    &Egrave;    È  capital E, grave accent
  Alt-I     \311   C9    &Eacute;    É  capital E, acute accent
  Alt-J     \312   CA    &Ecirc;     Ê  capital E, circumflex accent
  Alt-K     \313   CB    &Euml;      Ë  capital E, umlaut mark
  Alt-L     \314   CC    &Igrave;    Ì  capital I, grave accent
  Alt-M     \315   CD    &Iacute;    Í  capital I, acute accent
  Alt-N     \316   CE    &Icirc;     Î  capital I, circumflex accent
  Alt-O     \317   CF    &Iuml;      Ï  capital I, umlaut mark
  Alt-P     \320   D0    &ETH;       Ð  capital Eth, Icelandic
  Alt-Q     \321   D1    &Ntilde;    Ñ  capital N, tilde
  Alt-R     \322   D2    &Ograve;    Ò  capital O, grave accent
  Alt-S     \323   D3    &Oacute;    Ó  capital O, acute accent
  Alt-T     \324   D4    &Ocirc;     Ô  capital O, circumflex accent
  Alt-U     \325   D5    &Otilde;    Õ  capital O, tilde
  Alt-V     \326   D6    &Ouml;      Ö  capital O, umlaut mark
  Alt-X     \330   D8    &Oslash;    Ø  capital O, slash
  Alt-Y     \331   D9    &Ugrave;    Ù  capital U, grave accent
  Alt-Z     \332   DA    &Uacute;    Ú  capital U, acute accent
  Alt-[     \333   DB    &Ucirc;     Û  capital U, circumflex accent
  Alt-\     \334   DC    &Uuml;      Ü  capital U, umlaut mark
  Alt-]     \335   DD    &Yacute;    Ý  capital Y, acute accent
  Alt-^     \336   DE    &THORN;     Þ  capital THORN, Icelandic
  Alt-_     \337   DF    &szlig;     ß  sz ligature, German

  Alt-`     \340   E0    &agrave;    à  small a, grave accent
  Alt-a     \341   E1    &aacute;    á  small a, acute accent
  Alt-b     \342   E2    &acirc;     â  small a, circumflex accent
  Alt-c     \343   E3    &atilde;    ã  small a, tilde
  Alt-d     \344   E4    &auml;      ä  small a, umlaut mark
  Alt-e     \345   E5    &aring;     å  small a, ring
  Alt-f     \346   E6    &aelig;     æ  small ae diphthong
  Alt-g     \347   E7    &ccedil;    ç  small c, cedilla
  Alt-h     \350   E8    &egrave;    è  small e, grave accent
  Alt-i     \351   E9    &eacute;    é  small e, acute accent
  Alt-j     \352   EA    &ecirc;     ê  small e, circumflex accent
  Alt-k     \353   EB    &euml;      ë  small e, umlaut mark
  Alt-l     \354   EC    &igrave;    ì  small i, grave accent
  Alt-m     \355   ED    &iacute;    í  small i, acute accent
  Alt-n     \356   EE    &icirc;     î  small i, circumflex accent
  Alt-o     \357   EF    &iuml;      ï  small i, umlaut mark
  Alt-p     \360   F0    &eth;       ð  small eth, Icelandic
  Alt-q     \361   F1    &ntilde;    ñ  small n, tilde
  Alt-r     \362   F2    &ograve;    ò  small o, grave accent
  Alt-s     \363   F3    &oacute;    ó  small o, acute accent
  Alt-t     \364   F4    &ocirc;     ô  small o, circumflex accent
  Alt-u     \365   F5    &otilde;    õ  small o, tilde
  Alt-v     \366   F6    &ouml;      ö  small o, umlaut mark
  Alt-x     \370   F8    &oslash;    ø  small o, slash
  Alt-y     \371   F9    &ugrave;    ù  small u, grave accent
  Alt-z     \372   FA    &uacute;    ú  small u, acute accent
  Alt-{     \373   FB    &ucirc;     û  small u, circumflex accent
  Alt-|     \374   FC    &uuml;      ü  small u, umlaut mark
  Alt-}     \375   FD    &yacute;    ý  small y, acute accent
  Alt-~     \376   FE    &thorn;     þ  small thorn, Icelandic
            \377   FF    &yuml;      ÿ  small y, umlaut mark
*/

char* LangOctalEntity (char *StringPtr)

{
  static struct OctalEntityStruct
  {
     char *octal,
          *entity;
     char eightbit;
  }
  OctalEntity [] = {

     { "\42", "&quot;", '\"' },
     { "\47", "&apos;", '\'' },
     { "\46", "&amp;", '&' },
     { "\74", "&lt;", '<' },
     { "\76", "&gt;", '>' },
     { "\240", "&nbsp;", ' ' },

     { "\242", "&cent;", '¢' },
     { "\243", "&pound;", '£' },
     { "\245", "&yen;", '¥' },

     { "\274", "&frac14;", '¼' },
     { "\275", "&frac12;", '½' },
     { "\276", "&frac34;", '¾' },

     { "\241", "&iexcl;", '¡' },
     { "\253", "&laquo;", '«' },
     { "\273", "&raquo;", '»' },
     { "\277", "&iquest;", '¿' },

     { "\300", "&Agrave;", 'À' },
     { "\301", "&Aacute;", 'Á' },
     { "\302", "&Acirc;", 'Â' },
     { "\303", "&Atilde;", 'Ã' },
     { "\304", "&Auml;", 'Ä' },
     { "\305", "&Aring;", 'Å' },
     { "\306", "&AElig;", 'Æ' },
     { "\307", "&Ccedil;", 'Ç' },
     { "\310", "&Egrave;", 'È' },
     { "\311", "&Eacute;", 'É' },
     { "\312", "&Ecirc;", 'Ê' },
     { "\313", "&Euml;", 'Ë' },
     { "\314", "&Igrave;", 'Ì' },
     { "\315", "&Iacute;", 'Í' },
     { "\316", "&Icirc;", 'Î' },
     { "\317", "&Iuml;", 'Ï' },
     { "\320", "&ETH;", 'Ð' },
     { "\321", "&Ntilde;", 'Ñ' },
     { "\322", "&Ograve;", 'Ò' },
     { "\323", "&Oacute;", 'Ó' },
     { "\324", "&Ocirc;", 'Ô' },
     { "\325", "&Otilde;", 'Õ' },
     { "\326", "&Ouml;", 'Ö' },
     { "\330", "&Oslash;", 'Ø' },
     { "\331", "&Ugrave;", 'Ù' },
     { "\332", "&Uacute;", 'Ú' },
     { "\333", "&Ucirc;", 'Û' },
     { "\334", "&Uuml;", 'Ü' },
     { "\335", "&Yacute;", 'Ý' },
     { "\336", "&THORN;", 'Þ' },
     { "\337", "&szlig;", 'ß' },

     { "\340", "&agrave;", 'à' },
     { "\341", "&aacute;", 'á' },
     { "\342", "&acirc;", 'â' },
     { "\343", "&atilde;", 'ã' },
     { "\344", "&auml;", 'ä' },
     { "\345", "&aring;", 'å' },
     { "\346", "&aelig;", 'æ' },
     { "\347", "&ccedil;", 'ç' },
     { "\350", "&egrave;", 'è' },
     { "\351", "&eacute;", 'é' },
     { "\352", "&ecirc;", 'ê' },
     { "\353", "&euml;", 'ë' },
     { "\354", "&igrave;", 'ì' },
     { "\355", "&iacute;", 'í' },
     { "\356", "&icirc;", 'î' },
     { "\357", "&iuml;", 'ï' },
     { "\360", "&eth;", 'ð' },
     { "\361", "&ntilde;", 'ñ' },
     { "\362", "&ograve;", 'ò' },
     { "\363", "&oacute;", 'ó' },
     { "\364", "&ocirc;", 'ô' },
     { "\365", "&otilde;", 'õ' },
     { "\366", "&ouml;", 'ö' },
     { "\370", "&oslash;", 'ø' },
     { "\371", "&ugrave;", 'ù' },
     { "\372", "&uacute;", 'ú' },
     { "\373", "&ucirc;", 'û' },
     { "\374", "&uuml;", 'ü' },
     { "\375", "&yacute;", 'ý' },
     { "\376", "&thorn;", 'þ' },
     { "\377", "&yuml;", 'ÿ' },
     { NULL, NULL, 0 }
   };

   char  *cptr, *optr, *sptr, *zptr;
   char  OctalBuffer [256];
   struct OctalEntityStruct  *oeptr;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "LangOctalEntity() |%s|\n", StringPtr);

   /* no need to go to all this trouble if nothing suggestive of an entity */
   if (!strchr (StringPtr, '&')) return (StringPtr);

   zptr = (sptr = OctalBuffer) + sizeof(OctalBuffer)-1;
   cptr = StringPtr;
   while (*cptr && sptr < zptr)
   {
      if (*cptr == '&')
      {
         for (oeptr = (struct OctalEntityStruct*)&OctalEntity;
              oeptr->entity;
              oeptr++)
            if (!strncmp (cptr, oeptr->entity, strlen(oeptr->entity))) break;
         if (oeptr->entity)
         {
            for (optr = oeptr->octal; *optr && sptr < zptr; *sptr++ = *optr++);
            while (*cptr && *cptr != ';') cptr++;
            if (*cptr) cptr++;
         }
         else
         {
            while (*cptr && *cptr != ';' && sptr < zptr) *sptr++ = *cptr++;
            if (*cptr == ';' && sptr < zptr) *sptr++ = *cptr++;
         }
      }
      else
         *sptr++ = *cptr++;
   }
   *sptr = '\0';

   cptr =  CgiLibVeeMemCalloc (sptr-OctalBuffer+1);
   if (!cptr) ErrorExit (vaxc$errno, FI_LI);
   strcpy (cptr, OctalBuffer);

   return (cptr);
}

/*****************************************************************************/