Sean Erik O'Connor - Home Page and Free Mathematical Software.

   1#!/usr/bin/env python3
   2# ============================================================================
   3#
   4# NAME
   5#
   6#     updateweb.py
   7#
   8# DESCRIPTION
   9#
  10#     Python script which updates my web sites.
  11#
  12#     It does miscellaneous cleanup on my local copy of the web site on disk,
  13#     including updating copyright information, then synchronizes the local
  14#     copy to my remote server web sites using FTP.
  15#
  16# USAGE
  17#
  18#     It's best to use the associated makefile.
  19#     But you can call this Python utility from the command line,
  20#
  21#     $ python updateweb.py          Clean up my local copy, then use it
  22#                                    to update my remote web server site.
  23#                                    Log warnings and errors.
  24#     $ python updateweb.py -v       Same, but log debug messages also.
  25#     $ python updateweb.py -c       Clean up my local copy only.
  26#     $ python updateweb.py -t       Run unit tests only.
  27#     $ python updateweb.py -m       Upload MathJax files (only need to do this once).
  28#
  29#     We get username and password information from the file PARAMETERS_FILE.
  30#
  31#     Logs are written to the files,
  32#
  33#         logLocal.txt       Local web site cleanup log.
  34#         logRemote.txt      Remote web server update log.
  35#
  36# AUTHOR
  37#
  38#     Sean E. O'Connor        23 Aug 2007  Version 1.0 released.
  39#
  40# LEGAL
  41#
  42#     updateweb.py Version 7.3 - A Python utility program which maintains my web site.
  43#     Copyright (C) 2007-2025 by Sean Erik O'Connor.  All Rights Reserved.
  44#
  45#     This program is free software: you can redistribute it and/or modify
  46#     it under the terms of the GNU General Public License as published by
  47#     the Free Software Foundation, either version 3 of the License, or
  48#     (at your option) any later version.
  49#
  50#     This program is distributed in the hope that it will be useful,
  51#     but WITHOUT ANY WARRANTY; without even the implied warranty of
  52#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  53#     GNU General Public License for more details.
  54#
  55#     You should have received a copy of the GNU General Public License
  56#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
  57#
  58#     The author's address is seanerikoconnor!AT!gmail!DOT!com
  59#     with !DOT! replaced by . and the !AT! replaced by @
  60#
  61# NOTES
  62#
  63#    DOCUMENTATION
  64#
  65#    Python interpreter:               https://www.python.org/
  66#    Python tutorial and reference:    https://docs.python.org/lib/lib.html
  67#    Python debugger:                  https://docs.python.org/3/library/pdb.html
  68#    Python regular expression howto:  https://docs.python.org/3.7/howto/regex.html
  69#
  70# ============================================================================
  71
  72# ----------------------------------------------------------------------------
  73#  Load Python Packages
  74# ----------------------------------------------------------------------------
  75
  76# OS stuff
  77import sys
  78import os
  79import argparse
  80import subprocess
  81import shutil
  82from pathlib import Path
  83
  84# Regular expressions
  85import re
  86
  87# FTP stuff
  88import ftplib
  89
  90# Date and time
  91import time
  92import stat
  93import datetime
  94
  95# Logging
  96import logging
  97
  98# Unit testing
  99import unittest
 100
 101# Enumerated types (v3.4)
 102from enum import Enum
 103from typing import List, Any
 104
 105# YAML configuration files (a superset of JSON!)
 106import yaml 
 107# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 108try:
 109    from yaml import CLoader as Loader
 110except ImportError:
 111    from yaml import Loader
 112
 113# Python syntax highlighter.  See https://pygments.org
 114from pygments import highlight
 115from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
 116from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
 117from pygments.formatters import HtmlFormatter
 118
 119
 120# ----------------------------------------------------------------------------
 121#  Custom Top Level Exceptions.
 122# ----------------------------------------------------------------------------
 123
 124class UpdateWebException(Exception):
 125    """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
 126       Derive from Exception as recommended by Python manual"""
 127    pass
 128
 129# ----------------------------------------------------------------------------
 130#  User settings.
 131# ----------------------------------------------------------------------------
 132
 133class TreeWalkSettings(Enum):
 134    """Enum types for how to walk the directory tree."""
 135    BREADTH_FIRST_SEARCH = 1
 136    DEPTH_FIRST_SEARCH = 2
 137
 138class FileType(Enum):
 139    """'Enum' types for properties of directories and files."""
 140    DIRECTORY = 0
 141    FILE = 1
 142    ON_LOCAL_ONLY = 2
 143    ON_REMOTE_ONLY = 3
 144    ON_BOTH_LOCAL_AND_REMOTE = 4
 145
 146class UserSettings:
 147    """Megatons of user selectable settings."""
 148    # Logging control.
 149    LOGFILENAME = ""
 150    VERBOSE = False  # Verbose mode.  Prints out everything.
 151    CLEAN = False  # Clean the local website only.
 152    UNITTEST = False  # Run a unit test of a function.
 153    MATHJAX = False  # Process and upload MathJax files to server.
 154
 155    # When diving into the MathJax directory, web walking the deep directories
 156    # may exceed Python's default recursion limit of 1000.
 157    RECURSION_DEPTH = 5000
 158    sys.setrecursionlimit(RECURSION_DEPTH)
 159
 160    # Fields in the file information (file_info) structure.
 161    # For example, file_info = 
 162    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
 163    #      1,                                           -- Enum type: Is it a file? dir? on local? on remote? on both?
 164    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a datetime class.
 165    #      4675]                                        -- File size in bytes.
 166    FILE_NAME = 0
 167    FILE_TYPE = 1
 168    FILE_DATE_TIME = 2
 169    FILE_SIZE = 3
 170
 171    # Server settings.
 172    SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
 173    SERVER_NAME = None
 174    USER_NAME = None
 175    PASSWORD_NAME = None
 176    FTP_ROOT_NAME = None
 177    FILE_SIZE_LIMIT_NAME = None
 178
 179    # Map month names onto numbers.
 180    monthToNumber = {
 181        'Jan': 1,
 182        'Feb': 2,
 183        'Mar': 3,
 184        'Apr': 4,
 185        'May': 5,
 186        'Jun': 6,
 187        'Jul': 7,
 188        'Aug': 8,
 189        'Sep': 9,
 190        'Oct': 10,
 191        'Nov': 11,
 192        'Dec': 12}
 193
 194    # List of directories to skip over when processing or uploading the web page.
 195    # Some are private but most are dir of temporary files.
 196    # They will be listed as WARNING in the log.
 197    # Examples:
 198    #     My private admin settings directory.
 199    #     Git or SVN local admin directories.
 200    #     Compile build directories fromXCode.
 201    #     PyCharm build directories.
 202    #     Python cache directories.
 203    #     Jupyter checkpoint directories.
 204    #     XCode temporary file crap.
 205    DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
 206
 207    # List of files to skip when processing or uploading to the web page.
 208    # They will be listed as WARNING in the log.
 209    # Examples:
 210    #     MathJax yml file.
 211    #     .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
 212    FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
 213
 214    # Suffixes for temporary files which will be deleted during the cleanup
 215    # phase.
 216    TEMP_FILE_SUFFIXES = r"""        # Use Python raw strings.
 217        \.                           # Match the dot in the file name.
 218                                     # Now begin matching the file name suffix.
 219                                     # (?: non-capturing match for the regex inside the parentheses,
 220                                     #   i.e. matching string cannot be retrieved later.
 221                                     # Now match any of the following file extensions:
 222        (?: o   | obj | lib |        #     Object files generated by C, C++, etc compilers
 223                              pyc |  #     Object file generated by the Python compiler
 224                  ilk | pdb | sup |  #     Temp files from VC++ compiler
 225            idb | ncb | opt | plg |  #     Temp files from VC++ compiler
 226            sbr | bsc | map | bce |  #     Temp files from VC++ compiler
 227            res | aps | dep | db  |  #     Temp files from VC++ compiler
 228                              jbf |  #     Paintshop Pro
 229                      class | jar |  #     Java compiler
 230                              fas |  #     CLISP compiler
 231                        swp | swo |  #     Vim editor
 232                        toc | aux |  #     TeX auxilliary files (not .synctex.gz or .log)
 233          DS_Store  | _\.DS_Store |  #     macOS finder folder settings.
 234                       _\.Trashes |  #     macOS recycle bin
 235        gdb_history)                 #     GDB history
 236        $                            #     Now we should see only the end of line.
 237        """
 238
 239    # Special case:  Vim temporary files contain a twiddle anywhere in the
 240    # name.
 241    VIM_TEMP_FILE_EXT = "~"
 242
 243    # Suffixes for temporary directories which should be deleted during the
 244    # cleanup phase.
 245    TEMP_DIR_SUFFIX = r"""           # Use Python raw strings.
 246        (?: Debug | Release |        # C++ compiler
 247           ipch   | \.vs    |        # Temp directories from VC++ compiler
 248        \.Trashes | \.Trash)         # macOS recycle bin
 249        $
 250        """
 251
 252    # File extension for an internally created temporary file.
 253    TEMP_FILE_EXT = ".new"
 254
 255    # Identify source file types.
 256    HYPERTEXT_FILE_PATTERN = r"""  # Use Python raw strings.
 257        (\.                        # Match the filename suffix after the .
 258            (?: html | htm |       # HTML hypertext
 259                css)               # CSS style sheet
 260        $)                         # End of line.
 261    """
 262
 263    SOURCE_FILE_PATTERN = r"""      # Use Python raw strings.
 264        (?: makefile$ |             # Any file called makefile is a source file.
 265                                    # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
 266          .vimrc$ |                 # Vim script
 267          (.bashrc$ |               # Bash configuration files.
 268           .bash_profile$ |
 269           .bash_logout$) 
 270          |
 271          (.gitignore$ |             # Git configuration files.
 272           .gitignore_global$ | 
 273           .gitconfig$)
 274          |
 275          (\.                       # Match the filename suffix after the .
 276                                    # Now match any of these suffixes:
 277             (?: 
 278                  c | cpp | h | hpp |   #     C++ and C
 279                  js |                  #     JavaScript
 280                  py |                  #     Python
 281                  lsp |                 #     LISP
 282                  ipynb |               #     Jupyter notebook
 283                  m  |                  #     MATLAB
 284                  FOR | for | f |       #     FORTRAN
 285                  yaml |                #     YAML = JSON superset
 286                  tex |                 #     LaTeX
 287                  txt | dat |           #     Data files
 288                  sh)                   #     Bash
 289             $)                         # End of line.
 290         )
 291         """
 292
 293    # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
 294    SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
 295        (?: ^life\.html$          | # We want a listing of this particular HTML file.
 296            ^index\.html$         | # I want to list my top level HTML file.  (There is only one file with this name at the top level web directory.)
 297            ^webPageDesign\.html$ | # and also this HTML example file, but no others.
 298            ^StyleSheet\.css$ )     # I want to list my style sheet.
 299        """
 300
 301    # Files for which we want to generate a syntax highlighted source code listing.
 302    # Uses an f-string combined with a raw-string.
 303    FILE_TO_HIGHLIGHT_PATTERN = fr"""
 304        (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} | 
 305            {SOURCE_FILE_PATTERN} )
 306        """
 307
 308    # Update my email address.
 309    # This is tricky:  Prevent matching and updating the name within in this
 310    # Python source file by using the character class brackets.
 311    OLD_EMAIL_ADDRESS = r"""
 312        artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
 313        """
 314    NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
 315
 316    # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.  
 317    # Read patterns and strings from the updateweb.yaml file.
 318    STRING_REPLACEMENT_LIST = []
 319    # Pairs of test strings and their correct match/replacements.
 320    STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
 321
 322    # Match a copyright line like this:
 323    #     Copyright (C) 1999-2025 by Sean Erik O&#39;Connor.  All Rights Reserved.
 324    # Extract the copyright symbol which can be ascii (C) or HTML &copy; and extract the old year.
 325    TWO_DIGIT_YEAR_FORMAT = "%02d"
 326    COPYRIGHT_LINE = r"""
 327        Copyright                       # Copyright.
 328        \s+                             # One or more spaces.
 329        (?P<symbol> \(C\) | &copy;)     # Match and extract the copyright symbol.
 330        \D+                             # Any non-digits.
 331        (?P<old_year>[0-9]+)            # Match and extract the old copyright year, place it into variable 'old_year'
 332        -                               # hyphen
 333        ([0-9]+)                        # New copyright year.
 334        \s+                             # One or more spaces.
 335        by\s+Sean\sErik                 # Start of my name.  This way we don't rewrite somebody else's copyright notice.
 336        """
 337
 338    # Match a line containing the words,
 339    #    last updated YY
 340    # and extract the two digit year YY.
 341    LAST_UPDATED_LINE = r"""
 342        last\s+         # Match the words "last updated"
 343        updated\s+
 344        \d+             # Day number
 345        \s+             # One or more blanks or tab(
 346        [A-Za-z]+       # Month
 347        \s+             # One or more blanks or tabs
 348        (?P<year>\d+)   # Two digit year.  Place it into the variable 'year'
 349        """
 350
 351    # Web server root directory.
 352    DEFAULT_ROOT_DIR = "/"
 353
 354    # The ftp listing occasionally shows a date newer than the actual date. 
 355    # On my server, it could be 6 months newer when we are near New Year's Day.  Typically the server file time is only a 1 or 2 minutes newer.
 356    # But if the remote file time is much newer, it might be an old file with a bad date/time.  
 357    # Upload the file to be safe.
 358    # How to see the time differences from the log if they are large:
 359    #     egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
 360    #     Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
 361    # How to see the time differences from the log if they are small and we wait and NOT upload:
 362    #    egrep -o "Remote file.*is newer.*days" logRemote.txt
 363    #    Remote file error404.html is newer by    102.0 seconds =      1.7 minutes =      0.0 hours =      0.0 days
 364    #    Remote file index.html is newer by    113.0 seconds =      1.9 minutes =      0.0 hours =      0.0 days
 365    MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
 366
 367    # Upload only if we are newer by more than a few minutes.  Allows for a little slop in time stamps on server or host.
 368    MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
 369
 370    # An ftp list command line should be at least this many chars, or we'll
 371    # suspect and error.
 372    MIN_FTP_LINE_LENGTH = 7
 373
 374    # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
 375    # ftp listings are generally similar to UNIX ls -l listings.
 376    #
 377    # Some examples:
 378    #
 379    # (1) Freeservers ftp listing,
 380    #
 381    #          0        1   2                3           4    5   6   7      8
 382    #     drwxr-xr-x    3 1000             1000         4096 Nov 18  2006 Electronics
 383    #     -rw-r--r--    1 1000             1000        21984 Jun  4 03:46 StyleSheet.css
 384    #     -rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html
 385    #
 386    # (2) atspace ftp listing,
 387    #
 388    #     drwxr-xr-x    3  seanerikoconnor vusers         49 Apr  7  2006 Electronics
 389    #     -rw-r--r--    1  seanerikoconnor vusers      21984 Jun  4 04:03 StyleSheet.css
 390    #
 391    FTP_LISTING = r"""
 392        [drwx-]+            # Unix type file mode.
 393        \s+                 # One or more blanks or tabs.
 394        \d+                 # Number of links.
 395        \s+
 396        \w+                 # Owner.
 397        \s+
 398        \w+                 # Group.
 399        \s+
 400        (?P<bytes> \d+)     # File size in bytes, placed into the variable 'bytes'.
 401        \s+
 402        (?P<mon> \w+)       # Month modified, placed into the variable 'mon'.
 403        \s+
 404        (?P<day> \d+)       # Day modified, placed into the variable 'day'.
 405        \s+
 406        (
 407            (?P<hour> \d+)  # Hour modified, placed into the variable 'hour'.
 408            :
 409            (?P<min> \d+)   # Minute modified, placed into the variable 'min'.
 410            |
 411            (?P<year> \d+)  # If hours and minutes are absent (happens when year is not the current year),
 412                            # extract the year instead.
 413        )
 414        \s+
 415        (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+)    # Path and file name containing letters, numbers,
 416                                                    # and funny characters.  We must escape some of
 417                                                    # these characters with a backslash, \.
 418        """
 419
 420    # HTML header up to the style sheet.
 421    BASIC_HTML_BEGIN = \
 422        """
 423        <!DOCTYPE html>
 424        <html lang="en-US">  <!-- Set language of this page to USA English. -->
 425        
 426        <head>
 427            <!-- This page uses Unicode characters. -->
 428            <meta charset="utf-8">
 429        
 430            <!-- Set viewport to actual device width.  Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
 431            <meta name="viewport" content="width=device-width, initial-scale=1">
 432        
 433            <!-- Title appears in the web browser tab for this page.  The browser also uses it to bookmark this page. -->
 434            <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
 435        
 436            <!-- Search engines will search using words in this description.  They will also display title in their search results. -->
 437            <meta name="description" content="Syntax Colored Source Code Listing">
 438        
 439            <!-- Some content management software uses the author's name. -->
 440            <meta name="author" content="Sean Erik O'Connor">
 441        
 442            <meta name="copyright" content="Copyright (C) 1986-2025 by Sean Erik O'Connor.  All Rights Reserved.">   
 443        
 444            <!-- Begin style sheet insertion -->
 445            <style>
 446                /* Default settings for all my main web pages. */
 447                body
 448                {
 449                    /* A wide sans-serif font is more readable on the web. */
 450                    font-family:            Verdana, Geneva, "Trebuchet MS", sans-serif ;
 451        
 452                    /* Set the body font size a little smaller than the user's default browser setting. */
 453                    font-size:              0.8em ; 
 454        
 455                    /* Black text is easier to read. */
 456                    color:                  black ;
 457        
 458                    /*  More vertical space between lines for more pleasant reading.  Use a unitless font height multiplier.  
 459                        Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
 460                    line-height:            1.7 ;
 461                }
 462        
 463                <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
 464        """
 465
 466    # After the style sheet and up to the start of the article in the body.
 467    BASIC_HTML_MIDDLE = \
 468        """
 469            </style>
 470        </head>
 471        
 472        <body>
 473            <article class="content">
 474        """
 475
 476    # After the source code listing, finish the article, body and html document.
 477    BASIC_HTML_END = \
 478        """
 479            </article>
 480        </body>
 481        
 482        </html>
 483        """
 484
 485    def __init__(self):
 486        """Set up the user settings."""
 487
 488        self.local_root_dir = ""
 489
 490        # Import the user settings from the parameter file.
 491        self.get_local_root_dir()
 492        self.get_server_settings()
 493
 494        self.precompile_regular_expressions()
 495
 496    def get_server_settings(self):
 497        """
 498        Read web account private settings from a secret offline parameter file.
 499        These also hold patterns to match and replace in all of our source pages.
 500        """
 501
 502        # Private file which contains my account settings.
 503        settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
 504        # Recommended by
 505        #  https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 506        try:
 507            stream = open(settings_file_name, "r")
 508        except OSError as detail:
 509            logging.error(f"Cannot open the YAML file {settings_file_name:s}.  Unable to read the settings because: {str(detail):s}")
 510            # Rethrow the exception higher.
 511            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ") from detail
 512        # Read all the YAML documents in the file.
 513        yaml_contents = yaml.load_all(stream, Loader)
 514        yaml_document_list: list[Any] = []
 515        for yaml_doc in yaml_contents:
 516            yaml_document_list.append(yaml_doc)
 517        num_yaml_docs = len(yaml_document_list)
 518        if num_yaml_docs != 2:
 519            logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file.  Aborting...")
 520            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ")
 521
 522        # Load all the server settings.
 523        self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
 524        self.USER_NAME = yaml_document_list[0]['ftp_user_name']
 525        self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
 526        self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
 527        self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
 528
 529        # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
 530        self.STRING_REPLACEMENT_LIST = []
 531        pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
 532        for pat_rep in pat_rep_yaml_list:
 533            # Fetch the regular expression and compile it for speed.
 534            verbose_regex = pat_rep['pattern']
 535            pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
 536            # Since we use raw strings, we need to strip off leading and trailing whitespace.
 537            replacement_string = pat_rep['replacement_string'].strip().lstrip()
 538            self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
 539
 540        # Load the test and verify strings.
 541        test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
 542        for test_verify_string in test_verify_strings_list:
 543            test_string = test_verify_string['test_string'].strip().lstrip()
 544            verify_string = test_verify_string['verify_string'].strip().lstrip()
 545            self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
 546
 547        print("  ...done!", flush=True)
 548        return
 549
 550    def get_local_root_dir(self):
 551        """Get the local website root directory on this platform."""
 552
 553        # Each platform has a definite directory for the web page.
 554        local_web_dir_path = "/Desktop/Sean/WebSite"
 555
 556        if sys.platform.startswith('darwin'):
 557            self.local_root_dir = str(Path.home()) + local_web_dir_path
 558        # My Cyperpower PC running Ubuntu Linux.
 559        elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
 560            self.local_root_dir = str(Path.home()) + local_web_dir_path
 561        return
 562
 563    def precompile_regular_expressions(self):
 564        """For speed precompile the regular expression search patterns."""
 565        self.COPYRIGHT_LINE            = re.compile(self.COPYRIGHT_LINE,            re.VERBOSE | re.IGNORECASE)
 566        self.FTP_LISTING               = re.compile(self.FTP_LISTING,               re.VERBOSE | re.IGNORECASE)
 567        self.TEMP_FILE_SUFFIXES        = re.compile(self.TEMP_FILE_SUFFIXES,        re.VERBOSE | re.IGNORECASE)
 568        self.TEMP_DIR_SUFFIX           = re.compile(self.TEMP_DIR_SUFFIX,           re.VERBOSE)
 569        self.SOURCE_FILE_PATTERN       = re.compile(self.SOURCE_FILE_PATTERN,       re.VERBOSE)
 570        self.HYPERTEXT_FILE_PATTERN    = re.compile(self.HYPERTEXT_FILE_PATTERN,    re.VERBOSE)
 571        self.OLD_EMAIL_ADDRESS         = re.compile(self.OLD_EMAIL_ADDRESS,         re.VERBOSE | re.IGNORECASE)
 572        self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
 573        self.LAST_UPDATED_LINE         = re.compile(self.LAST_UPDATED_LINE,         re.VERBOSE | re.IGNORECASE)
 574
 575# ----------------------------------------------------------------------------
 576#  Unit test individual functions.
 577# ----------------------------------------------------------------------------
 578
 579class UnitTest(unittest.TestCase):
 580    """Initialize the UnitTest class."""
 581    def setUp(self):
 582        self.user_settings = UserSettings()
 583        self.user_settings.get_local_root_dir()
 584
 585    def tearDown(self):
 586        """Clean up the UnitTest class."""
 587        self.user_settings = None
 588
 589    def test_copyright_updating(self):
 590        """Test copyright line updating to the current year."""
 591        # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
 592        line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2025 by Sean Erik O'Connor"
 593        line_after_update_actual = "Copyright (C) 1999-2025 by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2025 by Sean Erik O'Connor"
 594        pat = self.user_settings.COPYRIGHT_LINE
 595        match = pat.search(line_before_update)
 596
 597        if match:
 598            old_year = int(match.group('old_year'))
 599            # Same as call to self.get_current_year():
 600            current_year = int(time.gmtime()[0])
 601            if old_year < current_year:
 602                # We matched and extracted the old copyright symbol into the variable
 603                # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
 604                # We now insert it back by placing the special syntax
 605                # \g<symbol> into the replacement string.
 606                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(current_year) + " by Sean Erik"
 607                line_after_update_computed = pat.sub(new_copyright, line_before_update)
 608                self.assertEqual(
 609                    line_after_update_actual,
 610                    line_after_update_computed,
 611                    f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
 612            else:
 613                print( "old_year >= current_year" )
 614                self.fail()
 615        else:
 616            print( "no match for copyright pattern" )
 617            self.fail()
 618
 619    def test_extract_filename_from_ftp_listing(self):
 620        """Test parsing an FTP listing."""
 621        ftp_line = "-rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html"
 622        extracted_file_name = "allclasses-frame.html"
 623        pat = self.user_settings.FTP_LISTING
 624        match = pat.search(ftp_line)
 625        if match:
 626            filename = match.group('filename')
 627            self.assertEqual(
 628                filename,
 629                extracted_file_name,
 630                f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
 631        else:
 632            self.fail()
 633
 634    def test_get_file_time_and_date(self):
 635        """Test getting a file time and date."""
 636        # Point to an old file.
 637        file_name = "./Images/home.png"
 638        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 639        # Get the UTC time.
 640        file_epoch_time = os.path.getmtime(full_file_name)
 641        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 642        # Create a datetime object for the file.
 643        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 644        # Check if the file time matches what we would see if we did ls -l <file_name>
 645        computed = f"file {file_name:s} datetime {d.ctime():s}"
 646        actual = "file ./Images/home.png datetime Wed Jan  1 03:42:41 2025"
 647        self.assertEqual(computed, actual)
 648
 649    def test_set_file_time_and_date(self):
 650        """Test setting a file time and date."""
 651        file_name = "./Images/home.png"
 652        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 653        # Create a temporary file in the same directory.
 654        temp_file_name = "temporal.tmp"
 655        full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
 656        try:
 657            with open(full_temp_file_name, 'w') as fp:
 658                fp.write("The End of Eternity")
 659        except OSError as detail:
 660            logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s}  Aborting...")
 661            raise UpdateWebException("Failed the unit test for setting time and date of a file.  See the log file for details.  Aborting...") from detail
 662        # Get the old file time.  Set the temporary file to the same time.
 663        file_stat = os.stat(full_file_name)
 664        os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
 665        # What is the temporary file's time now?
 666        file_epoch_time = os.path.getmtime(full_temp_file_name)
 667        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 668        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 669        # Is the temporary file time set properly?
 670        computed = f"file {file_name:s} datetime {d.ctime():s}"
 671        actual = "file ./Images/home.png datetime Wed Jan  1 03:42:41 2025"
 672        self.assertEqual(computed, actual)
 673        os.remove(full_temp_file_name)
 674
 675    def test_difference_of_time_and_date(self):
 676        """Test a date difference calculation."""
 677        file_name = "./Images/home.png"
 678        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 679        # Get the UTC time.
 680        file_epoch_time = os.path.getmtime(full_file_name)
 681        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 682        # Create a datetime object for the file.
 683        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 684        # Slightly change the date and time by adding 1 minute.
 685        d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1)  # year, month, day, hour, minute, second
 686        time_delta = d2 - d
 687        seconds_different = time_delta.total_seconds()
 688        minutes_different = seconds_different / 60.0
 689        hours_different = minutes_different / 60.0
 690        days_different = hours_different / 24.0
 691        computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
 692        actual = "difference  0.00001 days,  0.00028 hours  0.01667 minutes,  1.00000 seconds"
 693        self.assertEqual(computed, actual)
 694
 695    def test_pattern_match_dir_to_skip(self):
 696        """Test if skipping certain named directories is recoginizing the dir names."""
 697        dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
 698        pat = re.compile(self.user_settings.DIR_TO_SKIP)
 699        if pat.search(dir_skip):
 700            self.assertTrue(True)
 701        else:
 702            self.assertTrue(False)
 703
 704    def test_file_name_to_syntax_highlight(self):
 705        """Test if syntax highlighting recognizes file names to highlight."""
 706        file_name1 = "Computer/hello.lsp"
 707        file_name2 = "Computer/life.html"
 708        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
 709        if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
 710            self.assertTrue(True)
 711        else:
 712            self.assertTrue(False)
 713
 714    def test_user_settings(self):
 715        """Test whether user settings are correctly initialized."""
 716        computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
 717        actual = "File size limit = 50000 K"
 718        self.assertEqual(computed, actual, "File size limit settings are incorrect.")
 719
 720    def test_check_replace_substring(self,debug=True):
 721        """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
 722           For troubleshooting, turn on debug.
 723        """
 724        test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
 725        # Iterate over all test strings.
 726        for pair in test_verify_pairs:
 727            [test_string, verify_string] = pair
 728            if debug:
 729                print( f">>>>>>> next test string   = {test_string}")
 730                print( f">>>>>>> next verify string = {verify_string}")
 731            # Iterate over all patterns and replacements.
 732            for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
 733                [pat, rep_string] = match_replace_tuple
 734                print( f"\t-------> next pattern = {pat}") 
 735                print( f"\t-------> next replacement = {rep_string}") 
 736                match = pat.search(test_string)
 737                # The pattern match succeeds.
 738                if match:
 739                    try:
 740                        sub = pat.sub(rep_string, test_string)
 741                        # String replacement succeeds for this pattern/replace pair iteration.
 742                        if debug:
 743                            print( f"\t\t.......> match and replace: {test_string} ---> {sub}")
 744                        test_string = sub
 745                    except IndexError as detail:
 746                        print(f"\t\t.......> Caught an exception: {str(detail):s}.  Replacement failed.")
 747                        if debug:
 748                            self.assertTrue(False)
 749                elif debug:
 750                    print( f"\t\t.......> no match for pattern = {pat} in test string = {test_string}")
 751                # No match, so go on to the next pattern and don't change test_string.
 752            # Done with all pattern/replace on test string.
 753            # Check this test string in the list.
 754            self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
 755            if debug:
 756                print( f"\t******* DONE with all pattern matches and replacements on this test/verify string pair.\n" )
 757
 758# ----------------------------------------------------------------------------
 759#  Command line options.
 760# ----------------------------------------------------------------------------
 761
 762class CommandLineSettings(object):
 763    """Get the command line options."""
 764
 765    def __init__(self, user_settings, raw_args=None):
 766        """Get command line options"""
 767        command_line_parser = argparse.ArgumentParser(
 768            description="updateweb options")
 769
 770        # Log all changes, not just warnings and errors.
 771        command_line_parser.add_argument(
 772            "-v",
 773            "--verbose",
 774            help="Turn on verbose mode to log everything",
 775            action="store_true")
 776
 777        # Clean up the local website only.
 778        command_line_parser.add_argument(
 779            "-c",
 780            "--clean",
 781            help="Do a cleanup on the local web site only.",
 782            action="store_true")
 783
 784        # Also upload MathJax.
 785        command_line_parser.add_argument(
 786            "-m",
 787            "--mathjax",
 788            help="""ALSO upload mathjax directory.\
 789            Do this if you have a new version of MathJax or if you've reset your server and deleted the /mathjax remote directory.\
 790            You'll need to do git restore on any altered files in the local branch and git clean -f to remove any newly created files.\
 791            NOTE:  If you did reset your server and delete all files, run the command    find . -name '*.*' -exec touch {} \\;    from the web page root directory.\
 792            Also run   find . -name '*' -exec touch {} \\;    This will ensure accurate times on the server.""",
 793            action="store_true")
 794
 795        # Run unit tests only.
 796        command_line_parser.add_argument("-t", "--test",
 797                                         help="Run unit tests.",
 798                                         action="store_true")
 799
 800        args = command_line_parser.parse_args(raw_args)
 801
 802        if args.verbose:
 803            user_settings.VERBOSE = True
 804        if args.clean:
 805            user_settings.CLEAN = True
 806        if args.test:
 807            user_settings.UNITTEST = True
 808        if args.mathjax:
 809            user_settings.MATHJAX = True
 810
 811# ----------------------------------------------------------------------------
 812#  Base class which describes my web site overall.
 813# ----------------------------------------------------------------------------
 814
 815class WebSite(object):
 816    """
 817    Abstract class used for analyzing both local and remote (ftp server) websites.
 818    Contains the web-walking functions which traverse the directory structures and files.
 819    These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
 820    Child classes may define additional functions which only they need.
 821    """
 822
 823    def __init__(self, settings):
 824        """Set up root directories"""
 825
 826        # Import the user settings.
 827        self.user_settings = settings
 828
 829        # Queue keeps track of directories not yet processed.
 830        self.queue = []
 831
 832        # List of all directories traversed.
 833        self.directories = []
 834
 835        # List of files traversed, with file information.
 836        self.files = []
 837
 838        # Find out the root directory and go there.
 839        self.root_dir = self.get_root_dir()
 840        self.go_to_root_dir(self.root_dir)
 841
 842    # This is a Python decorator which says get_current_year is a class function.  And so there is no self first argument, and you can call it without creating an 
 843    # instance of this class.  Call it from anywhere, inside or outside the class, using WebSite.get_current_year().  You could just create a global function instead.)
 844    @staticmethod
 845    def get_current_year():
 846        """Get the current year."""
 847        return int(time.gmtime()[0])
 848
 849    @staticmethod
 850    def get_current_two_digit_year():
 851        """Get the last two digits of the current year."""
 852        return WebSite.get_current_year() % 100
 853
 854    @staticmethod
 855    def is_file_info_type(file_info):
 856        """Check if we have a file information structure or merely a simple file name."""
 857        try:
 858            if isinstance(file_info, list):
 859                return True
 860            elif isinstance(file_info, str):
 861                return False
 862            else:
 863                logging.error("is_file_info_type found a bad type.  Aborting...")
 864                raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ")
 865        except TypeError as detail:
 866            logging.error(f"is_file_info_type found a bad type {str(detail):s}.  Aborting...")
 867            raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ") from detail
 868
 869    def get_root_dir(self):
 870        """Subclass:  Put code here to get the root directory"""
 871        return ""
 872
 873    def go_to_root_dir(self, root_dir):
 874        """Subclass:  Put code here to go to the root directory"""
 875        pass  # Pythons's do-nothing statement.
 876
 877    def one_level_down(self, d):
 878        """Subclass:  Fill in with a method which returns a list of the
 879        directories and files immediately beneath dir"""
 880        return [], []
 881
 882    def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
 883        """Walk a directory in either depth first or breadth first order.  BFS is the default."""
 884
 885        # Get all subfiles and subdirectories off this node.
 886        subdirectories, subfiles = self.one_level_down(d)
 887
 888        # Add all the subfiles in order.
 889        for f in subfiles:
 890
 891            name = self.strip_root(f)
 892            logging.debug(f"Webwalking:  Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
 893
 894            # Some files are private so skip them from consideration.
 895            pat = re.compile(self.user_settings.FILE_TO_SKIP)
 896
 897            if pat.search(name[self.user_settings.FILE_NAME]):
 898                logging.warning(
 899                    f"Webwalking:  Skipping private file {name[self.user_settings.FILE_NAME]:s}")
 900            # Don't upload the log file due to file locking problems.
 901            elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
 902                logging.debug(f"Webwalking:  Skipping log file {name[self.user_settings.FILE_NAME]:s}")
 903            # File size limit on some servers.
 904            else:
 905                self.files.append(name)
 906
 907        # Queue up the subdirectories.
 908        for d in subdirectories:
 909            # Some directories are private such as .git or just temporary file
 910            # caches so skip them from consideration.
 911            pat = re.compile(self.user_settings.DIR_TO_SKIP)
 912            if pat.search(d):
 913                logging.warning(f"Webwalking:  Skipping private dir {d:s}")
 914            else:
 915                logging.debug(f"Webwalking:  Pushing dir {d:s} on the queue.")
 916                self.queue.append(d)
 917
 918        # Search through the directories.
 919        while len(self.queue) > 0:
 920            # For breadth first search, remove from beginning of queue.
 921            if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
 922                d = self.queue.pop(0)
 923
 924            # For depth first search, remove from end of queue.
 925            elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
 926                d = self.queue.pop()
 927            else:
 928                d = self.queue.pop(0)
 929
 930            name = self.strip_root(d)
 931            logging.debug(f"Webwalking:  Adding relative directory {name:s} to list, full path = {d:s}.")
 932            self.directories.append(name)
 933
 934            self.walk(d)
 935
 936    def strip_root(self, file_info):
 937        """Return a path, but strip off the root directory"""
 938
 939        root = self.root_dir
 940
 941        # Extract the file name.
 942        if self.is_file_info_type(file_info):
 943            name = file_info[self.user_settings.FILE_NAME]
 944        else:
 945            name = file_info
 946
 947        # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
 948        # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
 949        # Art/foo.txt
 950        lenroot = len(root)
 951        if root == self.user_settings.DEFAULT_ROOT_DIR:
 952            pass
 953        else:
 954            lenroot = lenroot + 1
 955
 956        stripped_path = name[lenroot:]
 957
 958        if self.is_file_info_type(file_info):
 959            # Update the file name only.
 960            return [stripped_path,
 961                    file_info[self.user_settings.FILE_TYPE],
 962                    file_info[self.user_settings.FILE_DATE_TIME],
 963                    file_info[self.user_settings.FILE_SIZE]]
 964        else:
 965            return stripped_path
 966
 967    def append_root_dir(self, root_dir, name):
 968        """Append the root directory to a path"""
 969
 970        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
 971        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
 972        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
 973            return root_dir + name
 974        else:
 975            return root_dir + "/" + name
 976
 977    def scan(self):
 978        """Scan the directory tree recursively from the root"""
 979        logging.debug(f"Webwalking:  Beginning recursive directory scan from root directory {self.root_dir:s}")
 980        self.walk(self.root_dir)
 981
 982    def modtime(self, f):
 983        """Subclass:  Get file modification time"""
 984        pass
 985
 986    def finish(self):
 987        """Quit web site"""
 988        logging.debug(f"Finished with WebSite object of class {type(self)}")
 989        pass
 990
 991# ----------------------------------------------------------------------------
 992#  Subclass which knows about the local web site on disk.
 993# ----------------------------------------------------------------------------
 994
 995class LocalWebSite(WebSite):
 996    """Walk the local web directory on local disk down from the root.
 997    Clean up temporary files and do other cleanup work."""
 998
 999    def __init__(self, settings):
1000        """Go to web page root and list all files and directories."""
1001
1002        # Initialize the parent class.
1003        WebSite.__init__(self, settings)
1004
1005        self.root_dir = self.get_root_dir()
1006        logging.debug(f"LocalWebSite.__init__():  \tRoot directory: {self.root_dir:s}")
1007
1008    def get_root_dir(self):
1009        """Get the name of the root directory"""
1010        return self.user_settings.local_root_dir
1011
1012    def go_to_root_dir(self, root_dir):
1013        """Go to the root directory"""
1014
1015        # Go to the root directory.
1016        logging.debug(f"LocalWebSite.go_to_root_dir():  \tchdir to root directory:  {root_dir:s}")
1017        os.chdir(root_dir)
1018
1019        # Read it back.
1020        self.root_dir = os.getcwd()
1021        logging.debug(f"LocalWebSite.go_to_root_dir():  \tgetcwd root directory:  {self.root_dir:s}")
1022
1023    def one_level_down(self, d):
1024        """List all files and subdirectories in the current directory, dir.  For files, collect file info
1025        such as time, date and size."""
1026
1027        directories = []
1028        files = []
1029
1030        # Change to current directory.
1031        os.chdir(d)
1032
1033        # List all subdirectories and files.
1034        dir_list = os.listdir(d)
1035
1036        if dir_list:
1037            for line in dir_list:
1038                # Add the full path prefix from the root.
1039                name = self.append_root_dir(d, line)
1040                logging.debug(f"LocalWebSite.one_level_down():  \tlocal dir or file {name:s}")
1041
1042                # Is it a directory or a file?
1043                if os.path.isdir(name):
1044                    directories.append(name)
1045                elif os.path.isfile(name):
1046                    # First assemble the file information of name, time/date and size into a list.
1047                    # Can index it like an array.  For example,
1048                    # file_info = 
1049                    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
1050                    #      1,                                           -- Enum type FileType.FILE = 1.
1051                    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a date/time class.
1052                    #      4675]                                        -- File size in bytes.
1053                    file_info = [name,
1054                                 FileType.FILE,
1055                                 self.get_file_date_time(name),
1056                                 self.get_file_size(name)]
1057                    files.append(file_info)
1058
1059        # Sort the names into order.
1060        if directories:
1061            directories.sort()
1062        if files:
1063            files.sort()
1064
1065        return directories, files
1066
1067    @staticmethod
1068    def get_file_date_time(file_name):
1069        """Get a local file time and date in UTC."""
1070
1071        file_epoch_time = os.path.getmtime(file_name)
1072        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1073        # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1074        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1075        return d
1076
1077    @staticmethod
1078    def get_file_size(file_name):
1079        """Get file size in bytes."""
1080        return os.path.getsize(file_name)
1081
1082    @staticmethod
1083    def clean_up_temp_file(temp_file_name, file_name, changed):
1084        """Remove the original file, rename the temporary file name to the original name.
1085        If there are no changes, just remove the temporary file.
1086        """
1087
1088        if changed:
1089            # Remove the old file now that we have the rewritten file.
1090            try:
1091                os.remove(file_name)
1092                logging.debug(f"Changes were made.  Removed original file {file_name:s}")
1093            except OSError as detail:
1094                logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}.  Need to remove it manually.")
1095
1096            # Rename the new file to the old file name.
1097            try:
1098                os.rename(temp_file_name, file_name)
1099                logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1100            except OSError as detail:
1101                logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}.  Need to rename manually")
1102        else:
1103            # No changes?  Remove the temporary file.
1104            try:
1105                os.remove(temp_file_name)
1106                logging.debug(f"No changes were made.  Removed temporary file {temp_file_name:s}")
1107            except OSError as detail:
1108                logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}.  Need to remove it manually.")
1109        return
1110
1111    @staticmethod
1112    def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1113        """
1114        Process each line of a file with a list of functions.  Create a new temporary file.
1115
1116        The default list is None which means make an exact copy.
1117        """
1118
1119        # Assume no changes.
1120        changed = False
1121
1122        # Open both input and output files for processing.  Check if we cannot do it.
1123        fin = None
1124        try:
1125            fin = open(in_file_name, "r")
1126        except IOError as detail:
1127            logging.error(f"process_lines_of_file():  \tCannot open file {in_file_name:s} for reading:  {str(detail):s} Aborting...")
1128            if fin is not None:
1129                fin.close()
1130            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1131        fout = None
1132        try:
1133            fout = open(out_file_name, "w")
1134        except IOError as detail:
1135            logging.error(f"process_lines_of_file():  \tCannot open file {out_file_name:s} for writing:  {str(detail):s} Aborting...")
1136            if fout is not None:
1137                fout.close()
1138            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1139
1140        # Read each line of the file, aborting if there is a read error.
1141        try:
1142            line = fin.readline()
1143
1144            # Rewrite the next line of the file using all the rewrite functions.
1145            while line:
1146                original_line = line
1147                # If we have one or more rewrite functions...
1148                if process_line_function_list is not None:
1149                    # ...apply each rewrite functions to the line, one after the other in order.
1150                    for processLineFunction in process_line_function_list:
1151                        if processLineFunction is not None:
1152                            line = processLineFunction(line)
1153
1154                if original_line != line:
1155                    logging.debug(f"Rewrote the line:    >>>{original_line:s}<<< into >>>{line:s}<<< for file {in_file_name:s}")
1156                    changed = True
1157
1158                fout.write(line)
1159
1160                line = fin.readline()
1161
1162            fin.close()
1163            fout.close()
1164        except IOError as detail:
1165            logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s}  Aborting...")
1166            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1167
1168        if changed:
1169            logging.debug(f"process_lines_of_file():  \tRewrote original file {in_file_name:s}."
1170                          f"Changes are in temporary copy {out_file_name:s}")
1171
1172        # Return True if any lines were changed.
1173        return changed
1174
1175    def clean(self):
1176        """Scan through all directories and files in the local on disk website and clean them up."""
1177
1178        num_source_files_changed = 0
1179        num_source_files_syntax_highlighted = 0
1180
1181        logging.debug("Cleaning up the local web page.")
1182
1183        if self.directories is None or self.files is None:
1184            logging.error("Web site has no directories or files.  Aborting...")
1185            raise UpdateWebException("Internal error for cleaning up the local web site.  See the log file for details.  Aborting... ")
1186
1187        for d in self.directories:
1188
1189            if self.is_temp_dir(d):
1190                # Add the full path prefix from the root.
1191                name = self.append_root_dir(self.get_root_dir(), d)
1192                try:
1193                    logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1194                    shutil.rmtree(name)
1195                except OSError as detail:
1196                    logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1197
1198        for f in self.files:
1199            # Add the full path prefix from the root.
1200            full_file_name = self.append_root_dir(
1201                self.get_root_dir(), f[self.user_settings.FILE_NAME])
1202
1203            # Remove all temporary files.
1204            if self.is_temp_file(f):
1205                try:
1206                    logging.debug(f"Removing temp file {full_file_name:s}")
1207                    os.remove(full_file_name)
1208                except OSError as detail:
1209                    logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1210
1211            # Update source code files.
1212            if self.is_source_or_hypertext_file(f):
1213                changed = self.rewrite_source_file(full_file_name)
1214                if changed:
1215                    num_source_files_changed += 1
1216                    logging.debug(f"Rewrote source code file {self.root_dir:s}")
1217
1218            # Generate a  syntax highlighted code listing.  
1219            # Make it the same time and date as the original code.  Then, only if there are recent changes, we will update the remote server.
1220            if self.is_file_to_syntax_highlight(f):
1221                # syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1222                syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name)
1223                if syntax_highlighted_file_name is not None:
1224                    logging.debug(f"Generated a syntax highlighted source listing file {syntax_highlighted_file_name:s} for the file {full_file_name:s}")
1225                else:
1226                    logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1227                num_source_files_syntax_highlighted += 1
1228
1229        logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1230        logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1231
1232    def is_temp_file(self, file_info):
1233        """Identify a file name as a temporary file"""
1234
1235        file_name = file_info[self.user_settings.FILE_NAME]
1236
1237        # Suffixes and names for temporary files be deleted.
1238        pat = self.user_settings.TEMP_FILE_SUFFIXES
1239        match = pat.search(file_name)
1240        # Remove any files containing twiddles anywhere in the name.
1241        if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1242            return True
1243
1244        return False
1245
1246    def is_temp_dir(self, dir_name):
1247        """Identify a name as a temporary directory."""
1248
1249        p = self.user_settings.TEMP_DIR_SUFFIX
1250        return p.search(dir_name)
1251
1252    def is_source_or_hypertext_file(self, file_info):
1253        """ Check if the file name is a source file or a hypertext file."""
1254
1255        file_name = file_info[self.user_settings.FILE_NAME]
1256        p1 = self.user_settings.SOURCE_FILE_PATTERN
1257        p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1258        if p1.search(file_name) or p2.search(file_name):
1259            return True
1260        else:
1261            return False
1262
1263    def is_file_to_syntax_highlight(self, file_info):
1264        """Check if this file type should have a syntax highlighted source listing."""
1265
1266        # Take apart the file name.
1267        full_file_name = file_info[self.user_settings.FILE_NAME]
1268        file_name = Path(full_file_name).name
1269
1270        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1271        if p.search(file_name):
1272            return True
1273        else:
1274            return False
1275
1276    def rewrite_substring(self, line):
1277        """Rewrite a line containing a pattern of your choice"""
1278
1279        # Start with the original unchanged line.
1280        rewritten_line = line
1281
1282        # Do the replacements in order from first to last.
1283        for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1284            # Get the next pattern match replacement string tuple.
1285            [pat, rep_string] = match_replace_tuple
1286            # Does it match?  Then do string substitution, else leave the line unchanged.
1287            match = pat.search(rewritten_line)
1288            if match:
1289                # Now we have these cases:
1290                #     -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1291                #     -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ... 
1292                #      If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1293                try:
1294                    sub = pat.sub(rep_string, rewritten_line)
1295                    rewritten_line = sub
1296                except IndexError as detail:
1297                    logging.error(f"ERROR: {str(detail):s}.  Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml.    Did not rewrite the line |{rewritten_line:s}|")
1298 
1299        return rewritten_line
1300
1301    def rewrite_email_address_line(self, line):
1302        """Rewrite lines containing old email addresses."""
1303
1304        # Search for the old email address.
1305        pat = self.user_settings.OLD_EMAIL_ADDRESS
1306        match = pat.search(line)
1307
1308        # Replace the old address with my new email address.
1309        if match:
1310            new_address = self.user_settings.NEW_EMAIL_ADDRESS
1311            sub = pat.sub(new_address, line)
1312            line = sub
1313
1314        return line
1315
1316    def rewrite_copyright_line(self, line):
1317        """Rewrite copyright lines if they are out of date."""
1318
1319        # Match the lines,
1320        #     Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1321        #     Copyright &copy; nnnn-mmmm by Sean Erik O'Connor.
1322        # and pull out the old year and save it.
1323        pat = self.user_settings.COPYRIGHT_LINE
1324        match = pat.search(line)
1325
1326        # Found a match.
1327        if match:
1328            old_year = int(match.group('old_year'))
1329
1330            # Replace the old year with the current year.
1331            # We matched and extracted the old copyright symbol into the variable
1332            # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
1333            # We now insert it back by placing the special syntax \g<symbol>
1334            # into the replacement string.
1335            if old_year < WebSite.get_current_year():
1336                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(WebSite.get_current_year()) + " by Sean Erik"
1337                sub = pat.sub(new_copyright, line)
1338                line = sub
1339        return line
1340
1341    def rewrite_last_update_line(self, line):
1342        """Rewrite the Last Updated line if the year is out of date."""
1343
1344        # Match the last updated line and pull out the year.
1345        #      last updated 01 Jan 25.
1346        p = self.user_settings.LAST_UPDATED_LINE
1347        m = p.search(line)
1348
1349        if m:
1350            last_update_year = int(m.group('year'))
1351
1352            # Convert to four digit years.
1353            if last_update_year > 90:
1354                last_update_year += 1900
1355            else:
1356                last_update_year += 2000
1357
1358            # If the year is old, rewrite to "01 Jan <current year>".
1359            if last_update_year < WebSite.get_current_year():
1360                two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1361                sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1362                line = sub
1363
1364        return line
1365
1366    def rewrite_source_file(self, file_name):
1367        """Rewrite copyright lines, last updated lines, etc."""
1368        changed = False
1369
1370        # Create a new temporary file name for the rewritten file.
1371        temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1372
1373        # Apply changes to all lines of the temporary file.  Apply change functions in
1374        # the sequence listed.
1375        if self.process_lines_of_file(file_name, temp_file_name,
1376                                      [self.rewrite_copyright_line,
1377                                       self.rewrite_last_update_line,
1378                                       self.rewrite_email_address_line,
1379                                       self.rewrite_substring]):
1380            logging.debug(f"Changed (rewritten) source file {file_name:s}")
1381            changed = True
1382
1383        # Rename the temporary file to the original file name.  If no changes, just delete the temp file.
1384        self.clean_up_temp_file(temp_file_name, file_name, changed)
1385
1386        return changed
1387
1388    @staticmethod
1389    def create_syntax_highlighted_code_listing(source_file_name, **kwargs):
1390        """Create a syntax highlighted source listing for the file and return its name.  Return None if there is an error.
1391        Keep the same date/time as the original file."""
1392
1393        # kwargs is a dictionary for key, value in kwargs.items():
1394        # for key, value in kwargs.items():
1395        #    if key in kwargs:
1396        #        print( f"kwargs:" )
1397        #        print( f"  key   = |{key}|")
1398        #        print( f"  value = |{value}|" )
1399        dry_run_value = kwargs.get('dry_run') 
1400        dry_run = False
1401        if dry_run_value is not None and dry_run_value is True:
1402            dry_run = True
1403
1404        # Take apart the file name.
1405        file_name_without_extension = Path(source_file_name).stem
1406        file_extension = Path(source_file_name).suffix
1407
1408        # Append *.html to the source code file name.  This will be the syntax highlighted code listing.
1409        syntax_highlighted_file_name = f"{source_file_name}.html"
1410
1411        # In the special case of Jupyter notebooks, use the Jupyter to HTML converter.
1412        if file_extension == ".ipynb":
1413            if dry_run:
1414                logging.debug(f"Dry run only:  don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1415                return None
1416            # Python manual recommends using the run() command instead of Popen().  See https://docs.python.org/3/library/subprocess.html#subprocess.run
1417            try:
1418                shell_command = f"jupyter nbconvert {source_file_name} --to html --output {syntax_highlighted_file_name}"
1419                # Throw an exception if we can't run the process.  
1420                # Capture the standard output and standar error and dump to /dev/null so it doesn't print to the command line when running this script.
1421                # Since the shell command is a single string, use shell=True in the run() command.
1422                subprocess.run([shell_command],shell=True,check=True,stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
1423            except subprocess.CalledProcessError as detail: 
1424                logging.error(f"Cannot convert the Jupyter file {source_file_name:s} to a syntax highlighted file: {str(detail):s}  Aborting...")
1425                return None
1426        # Otherwise, use the Pygments syntax highlighter.
1427        else:
1428            # First choose the language lexer from the file name itself if there's no extension.
1429            # Dotted file names are treated as the entire file name.
1430            match file_name_without_extension:
1431                case "makefile":
1432                    lexer = MakefileLexer()
1433                case ".bash_profile"|".bashrc"|".bash_logout":
1434                    lexer = BashLexer()
1435                case ".vimrc":
1436                    lexer = VimLexer()
1437                case ".gitignore_global" | ".gitignore" | ".gitconfig":
1438                    lexer = OutputLexer() # No formatting.
1439                case _:
1440                    # Choose the language lexer from the file extension.  Web stuff first, then programming languages.
1441                    match file_extension:
1442                        case ".html":
1443                            lexer = HtmlLexer()
1444                        case ".css":
1445                            lexer = CssLexer()
1446                        case ".js":
1447                            lexer = JavascriptLexer()
1448                        case ".sh":
1449                            lexer = BashLexer()
1450                        case ".py":
1451                            lexer = PythonLexer()
1452                        case ".c" | ".h":
1453                            lexer = CLexer()
1454                        case ".hpp" | ".cpp":
1455                            lexer = CppLexer()
1456                        case ".lsp":
1457                            lexer = CommonLispLexer()
1458                        case ".for" | ".FOR" | ".f":
1459                            lexer = FortranFixedLexer()  # Fixed format FORTRAN, not FORTRAN 90.
1460                        case ".txt" | ".dat":            # Generic data file;  no formatting.
1461                            lexer = OutputLexer()
1462                        case ".tex":
1463                            lexer = TexLexer()           # LaTeX, TeX, or related files.
1464                        case ".m":
1465                            lexer = MatlabLexer()
1466                        case ".yaml":
1467                            lexer = YamlLexer()
1468                        case _:
1469                            logging.error(f"Can't find a lexer for file {source_file_name}.  Cannot generate a syntax highlighted source listing.  Aborting...")
1470                            return None
1471
1472            # Read the source code file into a single string.
1473            try:
1474                with open(source_file_name, 'r') as fp:
1475                    source_file_string = fp.read()
1476            except OSError as detail:
1477                logging.error(f"Cannot read the source code file {source_file_name:s} for syntax highlighting: {str(detail):s}  Aborting...")
1478
1479            # Top level Pygments function generates the HTML for the highlighted code.
1480            highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1481
1482            # The style sheet is always the same for all languages.
1483            style_sheet = HtmlFormatter().get_style_defs('.highlight')
1484
1485            # Write out the syntax colored file.
1486            if dry_run:
1487                logging.debug(f"Dry run only:  don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1488                return None
1489            else:
1490                try:
1491                    # Write out the highlighted code listing in HTML with CSS style sheet attached.
1492                    with open(syntax_highlighted_file_name, 'w') as fp:
1493                        fp.write(UserSettings.BASIC_HTML_BEGIN)
1494                        fp.write(style_sheet)
1495                        fp.write(UserSettings.BASIC_HTML_MIDDLE)
1496                        fp.write(highlighted_html_source_file_string)
1497                        fp.write(UserSettings.BASIC_HTML_END)
1498                except OSError as detail:
1499                    logging.error(f"Cannot write the syntax highlighted file {syntax_highlighted_file_name:s}: {str(detail):s}  Aborting...")
1500        # ------- end Pygments syntax highlighter
1501
1502        # Set the syntax highlighted code file to the same modification and access time and date as the source file.
1503        file_stat = os.stat(source_file_name)
1504        os.utime(syntax_highlighted_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1505
1506        # Are the original source and the syntax highlighted code the same data and time?
1507        dates_and_times_source_file_name             = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1508        dates_and_times_syntax_highlighted_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1509        if dates_and_times_source_file_name != dates_and_times_syntax_highlighted_file_name:
1510            logging.error(f"Source code and syntax highlighted source don't have the same times.  source time = {dates_and_times_source_file_name.ctime():s} syntax highlighted time = {dates_and_times_syntax_highlighted_file_name.ctime():s} Aborting...")
1511            return None
1512
1513        logging.debug(f"Generated a syntax highlighted listing {syntax_highlighted_file_name:s} for the source code file {source_file_name:s} with the same time and date = {dates_and_times_source_file_name.ctime():s}")
1514        return syntax_highlighted_file_name
1515
1516# ----------------------------------------------------------------------------
1517#   Subclass which knows about the remote web site.
1518# ----------------------------------------------------------------------------
1519
1520class RemoteWebSite(WebSite):
1521    """Walk the remote web directory on a web server down from the root.
1522       Use FTP commands:
1523           https://en.wikipedia.org/wiki/List_of_FTP_commands
1524       Use the Python ftp library:
1525           https://docs.python.org/3/library/ftplib.html
1526    """
1527
1528    def __init__(self, user_settings):
1529        """Connect to FTP server and list all files and directories."""
1530
1531        # Root directory of FTP server.
1532        self.root_dir = user_settings.FTP_ROOT_NAME
1533        logging.debug(f"Set the remote web site ftp root dir = {self.root_dir:s}")
1534
1535        # Connect to FTP server and log in.
1536        try:
1537            # Turn on for troubleshooting ftp on the remote server.
1538            # self.ftp.set_debuglevel( 2 )
1539            # print( f"\nTrying ftp login to server name = {user_settings.SERVER_NAME} user name = {user_settings.USER_NAME} password =  {user_settings.PASSWORD_NAME}\n")
1540            self.ftp = ftplib.FTP(user_settings.SERVER_NAME)
1541            self.ftp.login(user_settings.USER_NAME, user_settings.PASSWORD_NAME)
1542        # Catch all exceptions with the parent class Exception:  all built-in,
1543        # non-system-exiting exceptions are derived from this class.
1544        except Exception as detail:
1545            # Extract the string message from the exception class with str().
1546            logging.error(f"Remote web site cannot login to ftp server: {str(detail):s}  Aborting...")
1547            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1548        else:
1549            logging.debug("Remote web site ftp login succeeded.")
1550
1551        logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1552
1553        # Initialize the superclass.
1554        WebSite.__init__(self, user_settings)
1555
1556    def go_to_root_dir(self, root_dir):
1557        """Go to the root directory"""
1558
1559        try:
1560            # Go to the root directory.
1561            self.ftp.cwd(root_dir)
1562            logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1563
1564            # Read it back.
1565            self.root_dir = self.ftp.pwd()
1566            logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1567
1568        except Exception as detail:
1569            logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1570            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1571
1572    def get_root_dir(self):
1573        """Get the root directory name"""
1574
1575        return self.root_dir
1576
1577    def finish(self):
1578        """Quit remote web site"""
1579        logging.debug(f"Finished with WebSite object of class {type(self)}")
1580        try:
1581            self.ftp.quit()
1582        except Exception as detail:
1583            logging.error(f"Cannot ftp quit: {str(detail):s}")
1584
1585    def one_level_down(self, d):
1586        """List files and directories in a subdirectory using ftp"""
1587
1588        directories = []
1589        files = []
1590
1591        try:
1592            # ftp listing from current dir.
1593            logging.debug(f"RemoteWebSite.one_level_down():  \tftp cwd: {d:s}")
1594            self.ftp.cwd(d)
1595            dir_list = []
1596
1597            # Use the nonstandard -a option in LIST to show all the hidden .* files.
1598            # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1599            # Note the second argument requires a callback function.
1600            self.ftp.retrlines('LIST -a', dir_list.append)
1601
1602        except Exception as detail:
1603            logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}:  {str(detail):s} Aborting...")
1604            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1605
1606        for line in dir_list:
1607            logging.debug(f"RemoteWebSite.one_level_down():  \tftp LIST: {line:s}")
1608
1609            # Line should at least have the minimum FTP information.
1610            if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1611                # Parse the FTP LIST and put the pieces into file_info.
1612                file_info = self.parse_ftp_list(line)
1613                logging.debug(f"RemoteWebSite.one_level_down():  \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1614
1615                # Skip over the UNIX hidden files for current and parent directories . and ..  Also skip over any NULL file names.
1616                if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1617                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1618                    pass
1619                # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1620                elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1621                    dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1622                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp dir (full path): {dirname:s}")
1623                    directories.append(dirname)
1624                # For a file:  Add the full path prefix from the root to the file name.
1625                else:
1626                    file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1627                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp file (full path):\
1628                        {file_info[self.user_settings.FILE_NAME]:s}")
1629                    files.append(file_info)
1630            else:
1631                logging.error(f"RemoteWebSite.one_level_down():  \tFTP LIST line is too short:  {line:s}")
1632
1633        directories.sort()
1634        files.sort()
1635
1636        return directories, files
1637
1638    def modtime(self, f):
1639        """Get the modification time of a file via ftp.  Return 0 if ftp cannot get it."""
1640        modtime = 0
1641
1642        try:
1643            response = self.ftp.sendcmd('MDTM ' + f)
1644            # MDTM returns the last modified time of the file in the format
1645            # "213 YYYYMMDDhhmmss \r\n <error-response>
1646            # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1647            # error-response is 550 for info not available, and 500 or 501 if command cannot
1648            # be parsed.
1649            if response[:3] == '213':
1650                modtime = response[4:]
1651        except ftplib.error_perm as detail:
1652            logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1653            modtime = 0
1654
1655        return modtime
1656
1657    def parse_ftp_list(self, line):
1658        """Parse the ftp file listing and return file name, datetime and file size.
1659
1660           An FTP LIST command will give output which looks like this for a file:
1661
1662               -rw-r--r--    1 1000       free             4084 Jul 18 16:55 sparkCoil.png
1663
1664           and for a directory:
1665
1666                drwxr-xr-x    2 1000       free             4096 Jul 18 16:36 ReadingList
1667
1668           FTP uses UTC for its listings; the conversion to local time is done by the OS.
1669           We can have problems on New Year's Eve.  For example, the local file date/time is
1670
1671              Mon Jan  1 06:23:12 2018
1672
1673           But the remote file date/time from FTP listing doesn't show a year even though we
1674           know it was written to the server in 2017.
1675
1676               Mon Dec 31 03:02:00
1677
1678           So we default the remote file year to current year 2018 and get
1679
1680               Mon Dec 31 03:02:00 2018
1681
1682           Now we think that the remote file is newer by 363.860278 days.
1683        """
1684
1685        # Find out if we've a directory or a file.
1686        if line[0] == 'd':
1687            dir_or_file = FileType.DIRECTORY
1688        else:
1689            dir_or_file = FileType.FILE
1690
1691        pattern = self.user_settings.FTP_LISTING
1692
1693        # Sensible defaults.
1694        filesize = 0
1695        filename = ""
1696        # Default the time to midnight.
1697        hour = 0
1698        minute = 0
1699        seconds = 0
1700        # Default the date to Jan 1 of the current year.
1701        month = 1
1702        day = 1
1703        year = WebSite.get_current_year()
1704
1705        # Extract time and date from the ftp listing.
1706        match = pattern.search(line)
1707
1708        if match:
1709            filesize = int(match.group('bytes'))
1710            month = self.user_settings.monthToNumber[match.group('mon')]
1711            day = int(match.group('day'))
1712
1713            # Remote file listing contains the year.  The FTP listing will omit the hour and minute.
1714            if match.group('year'):
1715                year = int(match.group('year'))
1716                logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1717            else:
1718                # Remote file listing omits the year.  Default the year to the current UTC time year.
1719                # That may be incorrect (see comments above).
1720                year = WebSite.get_current_year()
1721                logging.debug(f"ftp is missing the year;  use the current year = {year}")
1722
1723            # If the FTP listing has the hour and minute, it will omit the year.
1724            if match.group('hour') and match.group('min'):
1725                hour = int(match.group('hour'))
1726                minute = int(match.group('min'))
1727                logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1728
1729            filename = match.group('filename')
1730
1731        # Package up the time and date nicely.
1732        # Note if we didn't get any matches, we'll default the remote date and
1733        # time to Jan 1 midnight of the current year.
1734        d = datetime.datetime(year, month, day, hour, minute, seconds)
1735
1736        return [filename, dir_or_file, d, filesize]
1737
1738# ----------------------------------------------------------------------------
1739#  Class for synchronizing local and remote web sites.
1740# ----------------------------------------------------------------------------
1741
1742class UpdateWeb(object):
1743    """Given previously scanned local and remote directories, update the remote website."""
1744
1745    def __init__(
1746            self,
1747            user_settings,
1748            local_directory_list,
1749            local_file_info,
1750            remote_directory_list,
1751            remote_file_info):
1752        """Connect to remote site.  Accept previously scanned local and remote files and directories."""
1753
1754        # Initialize from args.
1755        self.user_settings = user_settings
1756        self.local_directory_list = local_directory_list
1757        self.remote_directory_list = remote_directory_list
1758        self.local_file_info = local_file_info
1759        self.remote_file_info = remote_file_info
1760
1761        # Initialize defaults.
1762        self.local_files_list = []
1763        self.remote_files_list = []
1764        self.local_file_to_size = {}
1765        self.local_file_to_date_time = {}
1766        self.remote_file_to_date_time = {}
1767        self.local_only_dirs = []
1768        self.local_only_files = []
1769        self.remote_only_dirs = []
1770        self.remote_only_files = []
1771        self.common_files = []
1772
1773        # Connect to FTP server and log in.
1774        try:
1775            self.ftp = ftplib.FTP(self.user_settings.SERVER_NAME)
1776            self.ftp.login(self.user_settings.USER_NAME, self.user_settings.PASSWORD_NAME)
1777        except Exception as detail:
1778            logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1779            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1780        else:
1781            logging.debug("ftp login succeeded.")
1782
1783        logging.debug(f"ftp server welcome message:  {self.ftp.getwelcome():s}")
1784
1785        # Local root directory.
1786        self.local_root_dir = self.user_settings.local_root_dir
1787        logging.debug(f"Local root directory: {self.local_root_dir:s}")
1788
1789        # Root directory of FTP server.
1790        self.ftp_root_dir = self.user_settings.FTP_ROOT_NAME
1791        logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1792
1793        # Transform KB string to integer bytes.  e.g. "200" => 2048000
1794        self.file_size_limit = int(self.user_settings.FILE_SIZE_LIMIT_NAME) * 1024
1795
1796        try:
1797            # Go to the root directory.
1798            self.ftp.cwd(self.ftp_root_dir)
1799
1800            # Read it back.
1801            self.ftp_root_dir = self.ftp.pwd()
1802            logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1803        except Exception as detail:
1804            logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1805
1806    def append_root_dir(self, root_dir, name):
1807        """Append the root directory to a path"""
1808
1809        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1810        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1811        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1812            return root_dir + name
1813        else:
1814            return root_dir + "/" + name
1815
1816    def file_info(self):
1817        """Create lists of file names from the file information.  Also create dictionaries which map file names onto
1818        dates, times, and sizes."""
1819
1820        # Extract file names.
1821        self.local_files_list = [
1822            file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1823        self.remote_files_list = [
1824            file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1825
1826        # Use a dictionary comprehension to create key/value pairs, 
1827        #     (file name, file date/time)
1828        # which map file names onto date/time.
1829        self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1830        self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1831
1832        # Dictionary comprehension creates a mapping of local file names onto file sizes.
1833        self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1834
1835    def update(self):
1836        """Scan through the local website, cleaning it up.
1837        Go to remote website on my servers and synchronize all files."""
1838
1839        self.file_info()
1840
1841        # Which files and directories are different.
1842        self.changes()
1843
1844        # Synchronize with the local web site.
1845        self.synchronize()
1846
1847    def changes(self):
1848        """Find the set of different directories and files on local and remote."""
1849
1850        # Add all directories which are only on local to the dictionary.
1851        dir_to_type = {
1852            d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1853
1854        # Scan through all remote directories, adding those only on remote or
1855        # on both.
1856        for d in self.remote_directory_list:
1857            if d in dir_to_type:
1858                dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1859            else:
1860                dir_to_type[d] = FileType.ON_REMOTE_ONLY
1861
1862        # Add all files which are only on local to the dictionary.
1863        file_to_type = {
1864            f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1865
1866        # Scan through all remote files, adding those only on remote or on
1867        # both.
1868        for f in self.remote_files_list:
1869            if f in file_to_type:
1870                file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1871            else:
1872                file_to_type[f] = FileType.ON_REMOTE_ONLY
1873
1874        logging.debug("Raw dictionary dump of directories")
1875        for k, v in dir_to_type.items():
1876            logging.debug(f"\t dir:  {str(k):s}  type: {str(v):s}")
1877
1878        logging.debug("Raw dictionary dump of files")
1879        for k, v in file_to_type.items():
1880            logging.debug(f"\t file: {str(k):s}  type: {str(v):s}")
1881
1882        # List of directories only on local.  Keep the ordering.
1883        self.local_only_dirs = [
1884            d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1885
1886        # List of directories only on remote.  Keep the ordering.
1887        self.remote_only_dirs = [
1888            d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1889
1890        # We don't care about common directories, only their changed files, if
1891        # any.
1892
1893        # List of files only on local.  Keep the ordering.
1894        self.local_only_files = [
1895            f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1896
1897        # List of files only on remote.  Keep the ordering.
1898        self.remote_only_files = [
1899            f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1900
1901        # List of common files on both local and remote.  Keep the ordering.
1902        self.common_files = [
1903            f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1904
1905        logging.debug("*** Directories only on local ******************************")
1906        for d in self.local_only_dirs:
1907            logging.debug(f"\t {d:s}")
1908
1909        logging.debug("*** Directories only on remote ******************************")
1910        for d in self.remote_only_dirs:
1911            logging.debug(f"\t {d:s}")
1912
1913        logging.debug("*** Files only on local ******************************")
1914        for f in self.local_only_files:
1915            logging.debug(f"\t {f:s}")
1916
1917        logging.debug("*** Files only on remote ******************************")
1918        for f in self.remote_only_files:
1919            logging.debug(f"\t {f:s}")
1920
1921        logging.debug("*** Common files ******************************")
1922        for f in self.common_files:
1923            logging.debug(f"name {f:s}")
1924            logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1925            logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1926
1927    def synchronize(self):
1928        """Synchronize files and subdirectories in the remote directory with the local directory."""
1929
1930        # If we have the same files in local and remote, compare their times
1931        # and dates.
1932        for f in self.common_files:
1933            local_file_time = self.local_file_to_date_time[f]
1934            remote_file_time = self.remote_file_to_date_time[f]
1935
1936            # What's the time difference?
1937            time_delta = remote_file_time - local_file_time
1938            # How much difference, either earlier or later?
1939            seconds_different = abs(time_delta.total_seconds())
1940            minutes_different = seconds_different / 60.0
1941            hours_different = minutes_different / 60.0
1942            days_different = hours_different / 24.0
1943
1944            # Assume no upload initially.
1945            upload_to_host = False
1946
1947            logging.debug(f"Common file:  {f:s}.")
1948
1949            # Remote file time is newer.
1950            # Allow 200 characters
1951            # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
1952
1953            if remote_file_time > local_file_time:
1954                # Remote file time is MUCH newer:  suspect time is out of joint on the server, so upload local local file to be safe.
1955                if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
1956                    logging.error(f"Remote file {f:s} is MUCH newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes]. Upload the file to be safe.")
1957                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
1958                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
1959
1960                    # Set the local file to the current time.
1961                    full_file_name = self.append_root_dir(
1962                        self.local_root_dir, f)
1963                    if os.path.exists(full_file_name):
1964                        # Change the access and modify times of the file to the current time.
1965                        os.utime(full_file_name, None)
1966                        logging.error(f"Touching local file {full_file_name:s} to make it the current time")
1967
1968                    upload_to_host = True
1969                # Remote file time is newer, but not by much.  Let's just assume a slight time mismatch on the server.  Don't upload.
1970                else:
1971                    logging.warning(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
1972                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1973                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1974                    upload_to_host = False
1975
1976            # Local file time is newer.
1977            elif local_file_time > remote_file_time:
1978                # Local file time slightly newer than the remote file.  So we are pretty sure the local file really got changed vs the server file.
1979                if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
1980                    logging.warning(f"Local file {f:20s} is SLIGHTLY newer by  {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD} minutes].  Uploading to remote server.")
1981                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1982                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1983                    upload_to_host = True
1984                else:
1985                    logging.warning(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
1986                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1987                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1988                    upload_to_host = False
1989
1990            # Cancel the upload if the file is too big for the server.
1991            size = self.local_file_to_size[f]
1992            if size >= self.file_size_limit:
1993                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
1994                upload_to_host = False
1995
1996            # Finally do the file upload.
1997            if upload_to_host:
1998                logging.debug(f"Uploading changed file {f:s}")
1999                # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
2000                print(f"Uploading changed file {f:s}...  ", end='', flush=True)
2001                self.upload(f)
2002
2003        # Remote directory is not in local.  Delete it.
2004        for d in self.remote_only_dirs:
2005            logging.debug(f"Deleting remote only directory {d:s}")
2006            print(f"Deleting remote only directory {d:s}...  ", end='', flush=True)
2007            self.rmdir(d)
2008
2009        # Local directory missing on remote.  Create it.
2010        # Due to breadth first order scan, we'll create parent directories
2011        # before child directories.
2012        for d in self.local_only_dirs:
2013            logging.debug(f"Only on local.  Creating new remote dir {d:s}.")
2014            print(f"Creating new remote directory {d:s}...  ", end='', flush=True)
2015            self.mkdir(d)
2016
2017        # Local file missing on remote.  Upload it.
2018        for f in self.local_only_files:
2019            logging.debug(f"Local only file.  Uploading {f:s} to remote.")
2020
2021            #  But cancel the upload if the file is too big for the server.
2022            size = self.local_file_to_size[f]
2023            if size >= self.file_size_limit:
2024                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d};"
2025                              f" too large for server, limit is {self.file_size_limit:d} bytes")
2026            else:
2027                logging.debug(f"Uploading new file {f:s}")
2028                print(f"Uploading new file {f:s}...  ", end='', flush=True)
2029                self.upload(f)
2030
2031        # Remote contains a file not present on the local.  Delete the file.
2032        for f in self.remote_only_files:
2033            logging.debug(f"Remote only file.  Deleting remote file {f:s}.")
2034            print(f"Deleting remote file {f:s}...  ", end='', flush=True)
2035            self.del_remote(f)
2036
2037    def del_remote(self, relative_file_path):
2038        """Delete a file using ftp."""
2039
2040        logging.debug(f"del_remote():  \trelative file path name: {relative_file_path:s}")
2041
2042        # Parse the relative file path into file name and relative directory.
2043        relative_dir, file_name = os.path.split(relative_file_path)
2044        logging.debug(f"del_remote():  \tfile name: {file_name:s}")
2045        logging.debug(f"del_remote():  \trelative dir: {relative_dir:s}")
2046        logging.debug(f"del_remote():  \tremote root dir: {self.ftp_root_dir:s}")
2047
2048        try:
2049            # Add the remote root path and go to the remote directory.
2050            remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2051            logging.debug(f"del_remote():  \tftp cd remote dir: {remote_dir:s}")
2052            self.ftp.cwd(remote_dir)
2053        except Exception as detail:
2054            logging.error(f"del_remote():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2055        else:
2056            try:
2057                logging.debug(f"del_remote():  \tftp rm: {file_name:s}")
2058
2059                # Don't remove zero length file names.
2060                if len(file_name) > 0:
2061                    self.ftp.delete(file_name)
2062                else:
2063                    logging.warning(
2064                        "fdel_remote():  skipping ftp delete;  file NAME {file_name:s} had zero length")
2065            except Exception as detail:
2066                logging.error(f"del_remote():  \tCannot ftp rm: {str(detail):s}")
2067
2068    def mkdir(self, relative_dir):
2069        """Create new remote directory using ftp."""
2070
2071        logging.debug(f"mkdir():  \trelative dir path name: {relative_dir:s}")
2072        logging.debug(f"mkdir():  \tremote root dir: {self.ftp_root_dir:s}")
2073
2074        # Parse the relative dir path into prefix dir and suffix dir.
2075        path, d = os.path.split(relative_dir)
2076        logging.debug(f"mkdir():  \tremote prefix dir: {path:s}")
2077        logging.debug(f"mkdir():  \tremote dir:  {d:s}")
2078
2079        try:
2080            # Add the remote root path and go to the remote directory.
2081            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2082            logging.debug(f"mkdir():  \tftp cd remote dir: {remote_dir:s}")
2083            self.ftp.cwd(remote_dir)
2084        except Exception as detail:
2085            logging.error(f"mkdir():  \tCannot ftp chrdir: {str(detail):s}  Skipping...")
2086        else:
2087            try:
2088                logging.debug(f"mkdir():  \tftp mkd: {d:s}")
2089                self.ftp.mkd(d)
2090            except Exception as detail:
2091                logging.error(f"mkdir():  \tCannot ftp mkdir: {str(detail):s}")
2092
2093    def rmdir(self, relative_dir):
2094        """Delete an empty directory using ftp."""
2095
2096        logging.debug(f"rmdir():  \tintermediate dir path name: {relative_dir:s}")
2097        logging.debug(f"rmdir():  \tremote root dir: {self.ftp_root_dir:s}")
2098
2099        # Parse the relative dir path into prefix dir and suffix dir.
2100        path, d = os.path.split(relative_dir)
2101        logging.debug(f"rmdir():  \tremote prefix dir: {path:s}")
2102        logging.debug(f"rmdir():  \tremote dir:  {d:s}")
2103
2104        try:
2105            # Add the remote root path and go to the remote directory.
2106            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2107            logging.debug(f"rmdir():  \tftp cd remote dir: {remote_dir:s}")
2108            self.ftp.cwd(remote_dir)
2109        except Exception as detail:
2110            logging.error(f"rmdir():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2111        else:
2112            try:
2113                logging.debug(f"rmdir():  \tftp rmd: {d:s}")
2114                self.ftp.rmd(d)
2115            except Exception as detail:
2116                logging.error(f"rmdir():  \tCannot ftp rmdir dir {d:s}: {str(detail):s}.  Directory is probably not empty.  Do a manual delete.")
2117
2118    def download(self, relative_file_path):
2119        """Download a binary file using ftp."""
2120
2121        logging.debug(f"download():  \tfile name: {relative_file_path:s}")
2122
2123        # Parse the relative file path into file name and relative directory.
2124        relative_dir, file_name = os.path.split(relative_file_path)
2125        logging.debug(f"download():  \tfile name: {file_name:s}")
2126        logging.debug(f"download():  \trelative dir: {relative_dir:s}")
2127        logging.debug(f"download():  \troot dir: {self.ftp_root_dir:s}")
2128
2129        # Add the remote root path and go to the remote directory.
2130        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2131        logging.debug(f"download():  \tftp cd remote dir: {remote_dir:s}")
2132
2133        try:
2134            self.ftp.cwd(remote_dir)
2135        except Exception as detail:
2136            logging.error(f"download():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2137        else:
2138            # Add the local root path to get the local file name.
2139            # Open local binary file to write into.
2140            local_file_name = self.append_root_dir(
2141                self.local_root_dir, relative_file_path)
2142            logging.debug(f"download():  \topen local file name: {local_file_name:s}")
2143            try:
2144                f = open(local_file_name, "wb")
2145                try:
2146                    # Calls f.write() on each block of the binary file.
2147                    # ftp.retrbinary( "RETR " + file_name, f.write )
2148                    pass
2149                except Exception as detail:
2150                    logging.error(f"download():  \tCannot cannot ftp retrbinary: {str(detail):s}")
2151                f.close()
2152            except IOError as detail:
2153                logging.error(f"download():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2154
2155    def upload(self, relative_file_path):
2156        """Upload  a binary file using ftp."""
2157
2158        logging.debug(f"upload():  \trelative file path name: {relative_file_path:s}")
2159
2160        # Parse the relative file path into file name and relative directory.
2161        relative_dir, file_name = os.path.split(relative_file_path)
2162        logging.debug(f"upload():  \tfile name: {file_name:s}")
2163        logging.debug(f"upload():  \trelative dir: {relative_dir:s}")
2164        logging.debug(f"upload():  \tremote root dir: {self.ftp_root_dir:s}")
2165
2166        # Add the remote root path and go to the remote directory.
2167        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2168        logging.debug(f"upload():  \tftp cd remote dir: {remote_dir:s}")
2169
2170        try:
2171            self.ftp.cwd(remote_dir)
2172        except Exception as detail:
2173            logging.error(f"upload():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2174        else:
2175            # Add the local root path to get the local file name.
2176            # Open local binary file to read from.
2177            local_file_name = self.append_root_dir(
2178                self.local_root_dir, relative_file_path)
2179            logging.debug(f"upload():  \topen local file name: {local_file_name:s}")
2180
2181            try:
2182                f = open(local_file_name, "rb")
2183                try:
2184                    # f.read() is called on each block of the binary file until
2185                    # EOF.
2186                    logging.debug(f"upload():  \tftp STOR file {file_name:s}")
2187                    self.ftp.storbinary("STOR " + file_name, f)
2188                except Exception as detail:
2189                    logging.error(f"upload():  \tCannot ftp storbinary: {str(detail):s}")
2190                f.close()
2191            except IOError as detail:
2192                logging.error(f"upload():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2193
2194    def finish(self):
2195        """Log out of an ftp session"""
2196        logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2197        try:
2198            self.ftp.quit()
2199        except Exception as detail:
2200            logging.error(f"Cannot ftp quit because {str(detail):s}")
2201
2202# ----------------------------------------------------------------------------
2203#  Main function
2204# ----------------------------------------------------------------------------
2205
2206def main(raw_args=None):
2207    """Main program.  Clean up and update my website."""
2208
2209    # Print the obligatory legal notice.
2210    print("""
2211    updateweb Version 7.3 - A Python utility program which maintains my web site.
2212    Copyright (C) 2007-2025 by Sean Erik O'Connor.  All Rights Reserved.
2213
2214    It deletes temporary files, rewrites old copyright lines and email address
2215    lines in source files, then synchronizes all changes to my web sites.
2216
2217    updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2218    GNU General Public License.  This is free software, and you are welcome
2219    to redistribute it under certain conditions; see the GNU General Public
2220    License for details.
2221    """)
2222
2223    # Put ALL the main code into a try block!
2224    try:
2225        # ---------------------------------------------------------------------
2226        #  Load default settings and start logging.
2227        # ---------------------------------------------------------------------
2228
2229        # Default user settings.
2230        user_settings = UserSettings()
2231
2232        print( f"Running main( {raw_args} ) Python version\
2233               {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2234               local web directory\
2235               {user_settings.local_root_dir}\n")
2236        # Get command line options such as --verbose.  Pass them back as flags in
2237        # user_settings.
2238        CommandLineSettings(user_settings, raw_args)
2239
2240        # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2241        if user_settings.UNITTEST:
2242            suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2243            unittest.TextTestRunner(verbosity=2).run(suite)
2244            # We are done!
2245            print("  ...done!", flush=True)
2246            return
2247
2248        # Start logging to file.  Verbose turns on logging for
2249        # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
2250        # otherwise we log only WARNING, ERROR, and CRITICAL levels.
2251        if user_settings.VERBOSE:
2252            loglevel = logging.DEBUG
2253        else:
2254            loglevel = logging.WARNING
2255
2256        # Pick the log file name on the host.
2257        if user_settings.CLEAN:
2258            user_settings.LOGFILENAME = "/private/logLocal.txt"
2259        else:
2260            user_settings.LOGFILENAME = "/private/logRemote.txt"
2261
2262        # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2263        if not user_settings.MATHJAX:
2264            user_settings.DIR_TO_SKIP += "|mathjax"
2265        else:
2266            print(f"Processing and uploading new or changed mathjax files.  Did you first git restore any changed files and git clean -f to remove extra files? ...  ", end='', flush=True)
2267            print(f"If you are loading MathJax for the first time --- don't forget to upload the file .htaccess manually using FileZilla or another ftp client...  ", end='', flush=True)
2268            print( "If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections...  ", end='', flush=True)
2269            logging.debug(f"Processing and uploading new or changed mathjax files.  Did you first git restore any changed files and git clean -f to remove extra files?", end='', flush=True)
2270            logging.debug(f"If you are loading MathJax for the first time --- don't forget to upload the file .htaccess manually using FileZilla or another ftp client.", end='', flush=True)
2271            logging.debug( "If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections.", end='', flush=True)
2272
2273        # Configure the logging and start it.
2274        logging.basicConfig( level=loglevel, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename=user_settings.local_root_dir + user_settings.LOGFILENAME, filemode='w')
2275        logging.debug("********** Begin logging") 
2276
2277        # ---------------------------------------------------------------------
2278        #  Scan the local website, finding out all files and directories.
2279        # ---------------------------------------------------------------------
2280
2281        # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
2282        print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}...  ", end='', flush=True)
2283        logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2284
2285        local = LocalWebSite(user_settings)
2286        local.scan()
2287
2288        # ---------------------------------------------------------------------
2289        # Clean up local website.
2290        # ---------------------------------------------------------------------
2291
2292        # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2293        print("Cleaning local web site...  ", end='', flush=True)
2294        logging.debug("========================== Cleaning the local web site")
2295        local.clean()
2296
2297        # We are done with the first scan of the local web site and will dispose of it.
2298        local.finish()
2299        del local
2300
2301        # ---------------------------------------------------------------------
2302        #  Rescan the local website since there will be changes to source
2303        #  files from the clean up stage.
2304        # ---------------------------------------------------------------------
2305
2306        print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}...  ", end='', flush=True)
2307        logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2308
2309        local = LocalWebSite(user_settings)
2310
2311        local.scan()
2312
2313        # ---------------------------------------------------------------------
2314        #  List all the local directories and files and their sizes.
2315        # ---------------------------------------------------------------------
2316
2317        # Local website directories.
2318        local_directory_list = local.directories
2319        logging.debug("********** List of all the Local Directories")
2320        for d in local_directory_list:
2321            logging.debug(f"\t {d:s}")
2322
2323        # Generate lists of the local website filenames only, and their sizes in byteskjjjj
2324        local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2325        total_number_of_files = len( local_files_name_size_pairs )
2326        logging.debug(f"********** List of all the Local Files from largest to smallest.  There are {total_number_of_files:15d} files.")
2327        local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2328
2329        # Local website filenames only, and their dates and times.
2330        local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2331        logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2332        for file_datetime_pair in local_file_datetime_pairs:
2333            logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2334
2335        # Total number of bytes in the local files.
2336        total_number_of_bytes = 0
2337        for file_size_pair in local_files_name_size_pairs:
2338            logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2339            total_number_of_bytes += file_size_pair[1]
2340        logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2341
2342        local.finish()
2343
2344        if user_settings.CLEAN:
2345            logging.debug("========================== Done with local file and directory cleanup...")
2346            del local
2347            print("...done!", flush=True)
2348            return
2349
2350        # ---------------------------------------------------------------------
2351        #  Scan the remote hosted web site.
2352        # ---------------------------------------------------------------------
2353
2354        print("Scanning remote web site...  ", end='', flush=True)
2355        logging.debug("========================== Scanning the remote web site...")
2356
2357        # Pick which website to update.
2358        logging.debug("Connecting to primary remote site.")
2359        remote = RemoteWebSite(user_settings)
2360        remote.scan()
2361        remote.finish()
2362
2363        # ---------------------------------------------------------------------
2364        #  List all the remote server directories and files and their sizes.
2365        # ---------------------------------------------------------------------
2366
2367        remote_directory_list = remote.directories
2368        logging.debug("********** Remote Directories")
2369        for d in remote_directory_list:
2370            logging.debug(f"\t {d:s}")
2371
2372        # Local website filenames only, and their sizes in bytes.
2373        remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2374        total_number_of_files = len( remote_files_name_size_list )
2375        logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2376        remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2377        total_number_of_bytes = 0
2378        for file_size in remote_files_name_size_list:
2379            logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2380            total_number_of_bytes += file_size[1]
2381        logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2382
2383        # ---------------------------------------------------------------------
2384        # Synchronize the local and remote web sites.
2385        # ---------------------------------------------------------------------
2386
2387        print("Synchronizing remote and local web sites...  ", end='', flush=True)
2388        logging.debug("========================= Synchronizing remote and local web sites...")
2389
2390        # Primary website.
2391        logging.debug("Connecting to primary remote site for synchronization.")
2392        sync = UpdateWeb(user_settings,
2393                         local.directories,
2394                         local.files,
2395                         remote.directories,
2396                         remote.files)
2397
2398        sync.update()
2399        sync.finish()
2400
2401        del sync
2402        del remote
2403        del local
2404        print("...done!", flush=True)
2405
2406    except UpdateWebException as detail:
2407        logging.error(f"Couldn't update the web directory:  {str(detail):s}.  Aborting...")
2408
2409    except RecursionError as detail:
2410        logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}.  Aborting...")
2411
2412if __name__ == '__main__':
2413    """Python executes all code in this file.  Finally, we come here.  
2414
2415    * If we are executing this file as a standalone Python script, 
2416      the name of the current module is set to __main__ and thus we'll call the main() function.
2417
2418    * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2419
2420        import updateweb
2421        updateweb.main(["--test"])"""
2422
2423    main()