1#!/usr/bin/env python3
   2# ============================================================================
   3#
   4# NAME
   5#
   6#     updateweb.py
   7#
   8# DESCRIPTION
   9#
  10#     Python script which updates my web sites.
  11#
  12#     It does miscellaneous cleanup on my local copy of the web site on disk,
  13#     including updating copyright information, then synchronizes the local
  14#     copy to my remote server web sites using FTP.
  15#
  16# USAGE
  17#
  18#     It's best to use the associated makefile.
  19#     But you can call this Python utility from the command line,
  20#
  21#     $ python updateweb.py          Clean up my local copy, then use it
  22#                                    to update my remote web server site.
  23#                                    Log warnings and errors.
  24#     $ python updateweb.py -v       Same, but log debug messages also.
  25#     $ python updateweb.py -c       Clean up my local copy only.
  26#     $ python updateweb.py -t       Run unit tests only.
  27#     $ python updateweb.py -m       Upload MathJax files (only need to do this once).
  28#
  29#     We get username and password information from the file PARAMETERS_FILE.
  30#
  31#     Logs are written to the files,
  32#
  33#         logLocal.txt       Local web site cleanup log.
  34#         logRemote.txt      Remote web server update log.
  35#
  36# AUTHOR
  37#
  38#     Sean E. O'Connor        23 Aug 2007  Version 1.0 released.
  39#
  40# LEGAL
  41#
  42#     updateweb.py Version 7.2 - A Python utility program which maintains my web site.
  43#     Copyright (C) 2007-2025 by Sean Erik O'Connor.  All Rights Reserved.
  44#
  45#     This program is free software: you can redistribute it and/or modify
  46#     it under the terms of the GNU General Public License as published by
  47#     the Free Software Foundation, either version 3 of the License, or
  48#     (at your option) any later version.
  49#
  50#     This program is distributed in the hope that it will be useful,
  51#     but WITHOUT ANY WARRANTY; without even the implied warranty of
  52#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  53#     GNU General Public License for more details.
  54#
  55#     You should have received a copy of the GNU General Public License
  56#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
  57#
  58#     The author's address is seanerikoconnor!AT!gmail!DOT!com
  59#     with !DOT! replaced by . and the !AT! replaced by @
  60#
  61# NOTES
  62#
  63#    DOCUMENTATION
  64#
  65#    Python interpreter:               https://www.python.org/
  66#    Python tutorial and reference:    https://docs.python.org/lib/lib.html
  67#    Python debugger:                  https://docs.python.org/3/library/pdb.html
  68#    Python regular expression howto:  https://docs.python.org/3.7/howto/regex.html
  69#
  70# ============================================================================
  71
  72# ----------------------------------------------------------------------------
  73#  Load Python Packages
  74# ----------------------------------------------------------------------------
  75
  76# OS stuff
  77import sys
  78import os
  79import argparse
  80import subprocess
  81import shutil
  82from pathlib import Path
  83
  84# Regular expressions
  85import re
  86
  87# FTP stuff
  88import ftplib
  89
  90# Date and time
  91import time
  92import stat
  93import datetime
  94
  95# Logging
  96import logging
  97
  98# Unit testing
  99import unittest
 100
 101# Enumerated types (v3.4)
 102from enum import Enum
 103from typing import List, Any
 104
 105# YAML configuration files (a superset of JSON!)
 106import yaml 
 107# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 108try:
 109    from yaml import CLoader as Loader
 110except ImportError:
 111    from yaml import Loader
 112
 113# Python syntax highlighter.  See https://pygments.org
 114from pygments import highlight
 115from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
 116from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
 117from pygments.formatters import HtmlFormatter
 118
 119
 120# ----------------------------------------------------------------------------
 121#  Custom Top Level Exceptions.
 122# ----------------------------------------------------------------------------
 123
 124class UpdateWebException(Exception):
 125    """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
 126       Derive from Exception as recommended by Python manual"""
 127    pass
 128
 129# ----------------------------------------------------------------------------
 130#  User settings.
 131# ----------------------------------------------------------------------------
 132
 133class TreeWalkSettings(Enum):
 134    """Enum types for how to walk the directory tree."""
 135    BREADTH_FIRST_SEARCH = 1
 136    DEPTH_FIRST_SEARCH = 2
 137
 138class FileType(Enum):
 139    """'Enum' types for properties of directories and files."""
 140    DIRECTORY = 0
 141    FILE = 1
 142    ON_LOCAL_ONLY = 2
 143    ON_REMOTE_ONLY = 3
 144    ON_BOTH_LOCAL_AND_REMOTE = 4
 145
 146class UserSettings:
 147    """Megatons of user selectable settings."""
 148    # Logging control.
 149    LOGFILENAME = ""
 150    VERBOSE = False  # Verbose mode.  Prints out everything.
 151    CLEAN = False  # Clean the local website only.
 152    UNITTEST = False  # Run a unit test of a function.
 153    MATHJAX = False  # Process and upload MathJax files to server.
 154
 155    # When diving into the MathJax directory, web walking the deep directories
 156    # may exceed Python's default recursion limit of 1000.
 157    RECURSION_DEPTH = 5000
 158    sys.setrecursionlimit(RECURSION_DEPTH)
 159
 160    # Fields in the file information (file_info) structure.
 161    # For example, file_info = 
 162    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
 163    #      1,                                           -- Enum type: Is it a file? dir? on local? on remote? on both?
 164    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a datetime class.
 165    #      4675]                                        -- File size in bytes.
 166    FILE_NAME = 0
 167    FILE_TYPE = 1
 168    FILE_DATE_TIME = 2
 169    FILE_SIZE = 3
 170
 171    # Server settings.
 172    SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
 173    SERVER_NAME = None
 174    USER_NAME = None
 175    PASSWORD_NAME = None
 176    FTP_ROOT_NAME = None
 177    FILE_SIZE_LIMIT_NAME = None
 178
 179    # Map month names onto numbers.
 180    monthToNumber = {
 181        'Jan': 1,
 182        'Feb': 2,
 183        'Mar': 3,
 184        'Apr': 4,
 185        'May': 5,
 186        'Jun': 6,
 187        'Jul': 7,
 188        'Aug': 8,
 189        'Sep': 9,
 190        'Oct': 10,
 191        'Nov': 11,
 192        'Dec': 12}
 193
 194    # List of directories to skip over when processing or uploading the web page.
 195    # Some are private but most are dir of temporary files.
 196    # They will be listed as WARNING in the log.
 197    # Examples:
 198    #     My private admin settings directory.
 199    #     Git or SVN local admin directories.
 200    #     Compile build directories fromXCode.
 201    #     PyCharm build directories.
 202    #     Python cache directories.
 203    #     Jupyter checkpoint directories.
 204    #     XCode temporary file crap.
 205    DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
 206
 207    # List of files to skip when processing or uploading to the web page.
 208    # They will be listed as WARNING in the log.
 209    # Examples:
 210    #     MathJax yml file.
 211    #     .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
 212    FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
 213
 214    # Suffixes for temporary files which will be deleted during the cleanup
 215    # phase.
 216    TEMP_FILE_SUFFIXES = r"""        # Use Python raw strings.
 217        \.                           # Match the dot in the file name.
 218                                     # Now begin matching the file name suffix.
 219                                     # (?: non-capturing match for the regex inside the parentheses,
 220                                     #   i.e. matching string cannot be retrieved later.
 221                                     # Now match any of the following file extensions:
 222        (?: o   | obj | lib |        #     Object files generated by C, C++, etc compilers
 223                              pyc |  #     Object file generated by the Python compiler
 224                  ilk | pdb | sup |  #     Temp files from VC++ compiler
 225            idb | ncb | opt | plg |  #     Temp files from VC++ compiler
 226            sbr | bsc | map | bce |  #     Temp files from VC++ compiler
 227            res | aps | dep | db  |  #     Temp files from VC++ compiler
 228                              jbf |  #     Paintshop Pro
 229                      class | jar |  #     Java compiler
 230                              fas |  #     CLISP compiler
 231                        swp | swo |  #     Vim editor
 232                        toc | aux |  #     TeX auxilliary files (not .synctex.gz or .log)
 233          DS_Store  | _\.DS_Store |  #     macOS finder folder settings.
 234                       _\.Trashes |  #     macOS recycle bin
 235        gdb_history)                 #     GDB history
 236        $                            #     Now we should see only the end of line.
 237        """
 238
 239    # Special case:  Vim temporary files contain a twiddle anywhere in the
 240    # name.
 241    VIM_TEMP_FILE_EXT = "~"
 242
 243    # Suffixes for temporary directories which should be deleted during the
 244    # cleanup phase.
 245    TEMP_DIR_SUFFIX = r"""           # Use Python raw strings.
 246        (?: Debug | Release |        # C++ compiler
 247           ipch   | \.vs    |        # Temp directories from VC++ compiler
 248        \.Trashes | \.Trash)         # macOS recycle bin
 249        $
 250        """
 251
 252    # File extension for an internally created temporary file.
 253    TEMP_FILE_EXT = ".new"
 254
 255    # Identify source file types.
 256    HYPERTEXT_FILE_PATTERN = r"""  # Use Python raw strings.
 257        (\.                        # Match the filename suffix after the .
 258            (?: html | htm |       # HTML hypertext
 259                css)               # CSS style sheet
 260        $)                         # End of line.
 261    """
 262
 263    SOURCE_FILE_PATTERN = r"""      # Use Python raw strings.
 264        (?: makefile$ |             # Any file called makefile is a source file.
 265                                    # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
 266          .vimrc$ |                 # Vim script
 267          (.bashrc$ |               # Bash configuration files.
 268           .bash_profile$ |
 269           .bash_logout$) 
 270          |
 271          (.gitignore$ |             # Git configuration files.
 272           .gitignore_global$ | 
 273           .gitconfig$)
 274          |
 275          (\.                       # Match the filename suffix after the .
 276                                    # Now match any of these suffixes:
 277             (?: 
 278                  c | cpp | h | hpp |   #     C++ and C
 279                  js |                  #     Javascript
 280                  py |                  #     Python
 281                  lsp |                 #     LISP
 282                  ipynb |               #     Jupyter notebook
 283                  m  |                  #     MATLAB
 284                  FOR | for | f |       #     FORTRAN
 285                  yaml |                #     YAML = JSON superset
 286                  tex |                 #     LaTeX
 287                  txt | dat |           #     Data files
 288                  sh)                   #     Bash
 289             $)                         # End of line.
 290         )
 291         """
 292
 293    # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
 294    SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
 295        (?: ^life\.html$          | # We want a listing of this particular HTML file.
 296            ^index\.html$         | # I want to list my top level HTML file.  (There is only one file with this name at the top level web directory.)
 297            ^webPageDesign\.html$ | # and also this HTML example file, but no others.
 298            ^StyleSheet\.css$ )     # I want to list my style sheet.
 299        """
 300
 301    # Files for which we want to generate a syntax highlighted source code listing.
 302    # Uses an f-string combined with a raw-string.
 303    FILE_TO_HIGHLIGHT_PATTERN = fr"""
 304        (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} | 
 305            {SOURCE_FILE_PATTERN} )
 306        """
 307
 308    # Update my email address.
 309    # This is tricky:  Prevent matching and updating the name within in this
 310    # Python source file by using the character class brackets.
 311    OLD_EMAIL_ADDRESS = r"""
 312        artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
 313        """
 314    NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
 315
 316    # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.  
 317    # Read patterns and strings from the updateweb.yaml file.
 318    STRING_REPLACEMENT_LIST = []
 319    # Pairs of test strings and their correct match/replacements.
 320    STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
 321
 322    # Change all old software version lines of the form
 323    #      Primpoly Version nnnn.nnnn
 324    # to the new software version.
 325    # Note that since we are using raw strings leading and trailing whitespace
 326    # is ignored in both pattern and replacement.
 327    CURRENT_SOFTWARE_VERSION = r"""
 328        Primpoly
 329        \s+
 330        Version
 331        \s+
 332        ([0-9]+)   # The two part version number NNN.nnn
 333        \.
 334        ([0-9]+)
 335        """
 336    NEW_SOFTWARE_VERSION = r"""
 337        Primpoly Version 16.3
 338        """
 339
 340    # Match a copyright line.  Then extract the copyright symbol which can be
 341    # ascii (C) or HTML &copy; and extract the old year.
 342    TWO_DIGIT_YEAR_FORMAT = "%02d"
 343    COPYRIGHT_LINE = r"""
 344        Copyright                       # Copyright.
 345        \s+                             # One or more spaces.
 346        (?P<symbol> \(C\) | &copy;)     # Match and extract the copyright symbol.
 347        \D+                             # Any non-digits.
 348        (?P<old_year>[0-9]+)            # Match and extract the old copyright year,
 349                                        # then place it into variable 'old_year'
 350        -                               # to
 351        ([0-9]+)                        # New copyright year.
 352        """
 353
 354    # Match a line containing the words,
 355    #    last updated YY
 356    # and extract the two digit year YY.
 357    LAST_UPDATED_LINE = r"""
 358        last\s+         # Match the words "last updated"
 359        updated\s+
 360        \d+             # Day number
 361        \s+             # One or more blanks or tab(
 362        [A-Za-z]+       # Month
 363        \s+             # One or more blanks or tabs
 364        (?P<year>\d+)   # Two digit year.  Place it into the variable 'year'
 365        """
 366
 367    # Web server root directory.
 368    DEFAULT_ROOT_DIR = "/"
 369
 370    # The ftp listing occasionally shows a date newer than the actual date. 
 371    # On my server, it could be 6 months newer when we are near New Year's Day.  Typically the server file time is only a 1 or 2 minutes newer.
 372    # But if the remote file time is much newer, it might be an old file with a bad date/time.  
 373    # Upload the file to be safe.
 374    # How to see the time differences from the log if they are large:
 375    #     egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
 376    #     Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
 377    # How to see the time differences from the log if they are small and we wait and NOT upload:
 378    #    egrep -o "Remote file.*is newer.*days" logRemote.txt
 379    #    Remote file error404.html is newer by    102.0 seconds =      1.7 minutes =      0.0 hours =      0.0 days
 380    #    Remote file index.html is newer by    113.0 seconds =      1.9 minutes =      0.0 hours =      0.0 days
 381    MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
 382
 383    # Upload only if we are newer by more than a few minutes.  Allows for a little slop in time stamps on server or host.
 384    MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
 385
 386    # An ftp list command line should be at least this many chars, or we'll
 387    # suspect and error.
 388    MIN_FTP_LINE_LENGTH = 7
 389
 390    # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
 391    # ftp listings are generally similar to UNIX ls -l listings.
 392    #
 393    # Some examples:
 394    #
 395    # (1) Freeservers ftp listing,
 396    #
 397    #          0        1   2                3           4    5   6   7      8
 398    #     drwxr-xr-x    3 1000             1000         4096 Nov 18  2006 Electronics
 399    #     -rw-r--r--    1 1000             1000        21984 Jun  4 03:46 StyleSheet.css
 400    #     -rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html
 401    #
 402    # (2) atspace ftp listing,
 403    #
 404    #     drwxr-xr-x    3  seanerikoconnor vusers         49 Apr  7  2006 Electronics
 405    #     -rw-r--r--    1  seanerikoconnor vusers      21984 Jun  4 04:03 StyleSheet.css
 406    #
 407    FTP_LISTING = r"""
 408        [drwx-]+            # Unix type file mode.
 409        \s+                 # One or more blanks or tabs.
 410        \d+                 # Number of links.
 411        \s+
 412        \w+                 # Owner.
 413        \s+
 414        \w+                 # Group.
 415        \s+
 416        (?P<bytes> \d+)     # File size in bytes, placed into the variable 'bytes'.
 417        \s+
 418        (?P<mon> \w+)       # Month modified, placed into the variable 'mon'.
 419        \s+
 420        (?P<day> \d+)       # Day modified, placed into the variable 'day'.
 421        \s+
 422        (
 423            (?P<hour> \d+)  # Hour modified, placed into the variable 'hour'.
 424            :
 425            (?P<min> \d+)   # Minute modified, placed into the variable 'min'.
 426            |
 427            (?P<year> \d+)  # If hours and minutes are absent (happens when year is not the current year),
 428                            # extract the year instead.
 429        )
 430        \s+
 431        (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+)    # Path and file name containing letters, numbers,
 432                                                    # and funny characters.  We must escape some of
 433                                                    # these characters with a backslash, \.
 434        """
 435
 436    # HTML header up to the style sheet.
 437    BASIC_HTML_BEGIN = \
 438        """
 439        <!DOCTYPE html>
 440        <html lang="en-US">  <!-- Set language of this page to USA English. -->
 441        
 442        <head>
 443            <!-- This page uses Unicode characters. -->
 444            <meta charset="utf-8">
 445        
 446            <!-- Set viewport to actual device width.  Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
 447            <meta name="viewport" content="width=device-width, initial-scale=1">
 448        
 449            <!-- Title appears in the web browser tab for this page.  The browser also uses it to bookmark this page. -->
 450            <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
 451        
 452            <!-- Search engines will search using words in this description.  They will also display title in their search results. -->
 453            <meta name="description" content="Syntax Colored Source Code Listing">
 454        
 455            <!-- Some content management software uses the author's name. -->
 456            <meta name="author" content="Sean Erik O'Connor">
 457        
 458            <meta name="copyright" content="Copyright (C) 1986-2025 by Sean Erik O'Connor.  All Rights Reserved.">   
 459        
 460            <!-- Begin style sheet insertion -->
 461            <style>
 462                /* Default settings for all my main web pages. */
 463                body
 464                {
 465                    /* A wide sans-serif font is more readable on the web. */
 466                    font-family:            Verdana, Geneva, "Trebuchet MS", sans-serif ;
 467        
 468                    /* Set the body font size a little smaller than the user's default browser setting. */
 469                    font-size:              0.8em ; 
 470        
 471                    /* Black text is easier to read. */
 472                    color:                  black ;
 473        
 474                    /*  More vertical space between lines for more pleasant reading.  Use a unitless font height multiplier.  
 475                        Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
 476                    line-height:            1.7 ;
 477                }
 478        
 479                <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
 480        """
 481
 482    # After the style sheet and up to the start of the article in the body.
 483    BASIC_HTML_MIDDLE = \
 484        """
 485            </style>
 486        </head>
 487        
 488        <body>
 489            <article class="content">
 490        """
 491
 492    # After the source code listing, finish the article, body and html document.
 493    BASIC_HTML_END = \
 494        """
 495            </article>
 496        </body>
 497        
 498        </html>
 499        """
 500
 501    def __init__(self):
 502        """Set up the user settings."""
 503
 504        self.local_root_dir = ""
 505
 506        # Import the user settings from the parameter file.
 507        self.get_local_root_dir()
 508        self.get_server_settings()
 509
 510        self.precompile_regular_expressions()
 511
 512    def get_server_settings(self):
 513        """
 514        Read web account private settings from a secret offline parameter file.
 515        These also hold patterns to match and replace in all of our source pages.
 516        """
 517
 518        # Private file which contains my account settings.
 519        settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
 520        # Recommended by
 521        #  https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 522        try:
 523            stream = open(settings_file_name, "r")
 524        except OSError as detail:
 525            logging.error(f"Cannot open the YAML file {settings_file_name:s}.  Unable to read the settings because: {str(detail):s}")
 526            # Rethrow the exception higher.
 527            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ") from detail
 528        # Read all the YAML documents in the file.
 529        yaml_contents = yaml.load_all(stream, Loader)
 530        yaml_document_list: list[Any] = []
 531        for yaml_doc in yaml_contents:
 532            yaml_document_list.append(yaml_doc)
 533        num_yaml_docs = len(yaml_document_list)
 534        if num_yaml_docs != 2:
 535            logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file.  Aborting...")
 536            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ")
 537
 538        # Load all the server settings.
 539        self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
 540        self.USER_NAME = yaml_document_list[0]['ftp_user_name']
 541        self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
 542        self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
 543        self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
 544
 545        # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
 546        self.STRING_REPLACEMENT_LIST = []
 547        pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
 548        for pat_rep in pat_rep_yaml_list:
 549            # Fetch the regular expression and compile it for speed.
 550            verbose_regex = pat_rep['pattern']
 551            pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
 552            # Since we use raw strings, we need to strip off leading and trailing whitespace.
 553            replacement_string = pat_rep['replacement_string'].strip().lstrip()
 554            self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
 555
 556        # Load the test and verify strings.
 557        test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
 558        for test_verify_string in test_verify_strings_list:
 559            test_string = test_verify_string['test_string'].strip().lstrip()
 560            verify_string = test_verify_string['verify_string'].strip().lstrip()
 561            self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
 562
 563        print("  ...done!", flush=True)
 564        return
 565
 566    def get_local_root_dir(self):
 567        """Get the local website root directory on this platform."""
 568
 569        # Each platform has a definite directory for the web page.
 570        local_web_dir_path = "/Desktop/Sean/WebSite"
 571
 572        if sys.platform.startswith('darwin'):
 573            self.local_root_dir = str(Path.home()) + local_web_dir_path
 574        # My Cyperpower PC running Ubuntu Linux.
 575        elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
 576            self.local_root_dir = str(Path.home()) + local_web_dir_path
 577        return
 578
 579    def precompile_regular_expressions(self):
 580        """For speed precompile the regular expression search patterns."""
 581        self.COPYRIGHT_LINE            = re.compile(self.COPYRIGHT_LINE,            re.VERBOSE | re.IGNORECASE)
 582        self.CURRENT_SOFTWARE_VERSION  = re.compile(self.CURRENT_SOFTWARE_VERSION,  re.VERBOSE | re.IGNORECASE)
 583        self.FTP_LISTING               = re.compile(self.FTP_LISTING,               re.VERBOSE | re.IGNORECASE)
 584        self.TEMP_FILE_SUFFIXES        = re.compile(self.TEMP_FILE_SUFFIXES,        re.VERBOSE | re.IGNORECASE)
 585        self.TEMP_DIR_SUFFIX           = re.compile(self.TEMP_DIR_SUFFIX,           re.VERBOSE)
 586        self.SOURCE_FILE_PATTERN       = re.compile(self.SOURCE_FILE_PATTERN,       re.VERBOSE)
 587        self.HYPERTEXT_FILE_PATTERN    = re.compile(self.HYPERTEXT_FILE_PATTERN,    re.VERBOSE)
 588        self.OLD_EMAIL_ADDRESS         = re.compile(self.OLD_EMAIL_ADDRESS,         re.VERBOSE | re.IGNORECASE)
 589        self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
 590        self.LAST_UPDATED_LINE         = re.compile(self.LAST_UPDATED_LINE,         re.VERBOSE | re.IGNORECASE)
 591
 592# ----------------------------------------------------------------------------
 593#  Unit test individual functions.
 594# ----------------------------------------------------------------------------
 595
 596class UnitTest(unittest.TestCase):
 597    """Initialize the UnitTest class."""
 598    def setUp(self):
 599        self.user_settings = UserSettings()
 600        self.user_settings.get_local_root_dir()
 601
 602    def tearDown(self):
 603        """Clean up the UnitTest class."""
 604        self.user_settings = None
 605
 606    def test_copyright_updating(self):
 607        """Test copyright line updating to the current year."""
 608        # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
 609        line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2025 by Sean Erik O'Connor"
 610        line_after_update_actual = "Copyright (C) 1999-2025 by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2025 by Sean Erik O'Connor"
 611        pat = self.user_settings.COPYRIGHT_LINE
 612        match = pat.search(line_before_update)
 613
 614        if match:
 615            old_year = int(match.group('old_year'))
 616            # Same as call to self.get_current_year():
 617            current_year = int(time.gmtime()[0])
 618            if old_year < current_year:
 619                # We matched and extracted the old copyright symbol into the variable
 620                # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
 621                # We now insert it back by placing the special syntax
 622                # \g<symbol> into the replacement string.
 623                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
 624                                str(current_year)
 625                line_after_update_computed = pat.sub(new_copyright, line_before_update)
 626                self.assertEqual(
 627                    line_after_update_actual,
 628                    line_after_update_computed,
 629                    f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
 630            else:
 631                self.fail()
 632        else:
 633            self.fail()
 634
 635    def test_update_software_version(self):
 636        """Test updating to a new version of Primpoly."""
 637        # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
 638        old_version_line = "|     Primpoly Version 00." + "0 - A Program for Computing Primitive Polynomials.|"
 639        new_version_line = "|     Primpoly Version 16." + "3 - A Program for Computing Primitive Polynomials.|"
 640        pat = self.user_settings.CURRENT_SOFTWARE_VERSION
 641        match = pat.search(old_version_line)
 642        if match:
 643            # Note that since we are using raw strings leading and trailing
 644            # whitespace is ignored.
 645            new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
 646            updated_version_line = pat.sub(new_version, old_version_line)
 647            self.assertEqual(updated_version_line, new_version_line, f"updated version line = {updated_version_line:s} new line = {new_version_line:s}")
 648        else:
 649            self.fail()
 650
 651    def test_extract_filename_from_ftp_listing(self):
 652        """Test parsing an FTP listing."""
 653        ftp_line = "-rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html"
 654        extracted_file_name = "allclasses-frame.html"
 655        pat = self.user_settings.FTP_LISTING
 656        match = pat.search(ftp_line)
 657        if match:
 658            filename = match.group('filename')
 659            self.assertEqual(
 660                filename,
 661                extracted_file_name,
 662                f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
 663        else:
 664            self.fail()
 665
 666    def test_get_file_time_and_date(self):
 667        """Test getting a file time and date."""
 668        # Point to an old file.
 669        file_name = "./Images/home.png"
 670        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 671        # Get the UTC time.
 672        file_epoch_time = os.path.getmtime(full_file_name)
 673        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 674        # Create a datetime object for the file.
 675        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 676        # Check if the file time matches what we would see if we did ls -l <file_name>
 677        computed = f"file {file_name:s} datetime {d.ctime():s}"
 678        actual = "file ./Images/home.png datetime Wed Jan  1 03:42:41 2025"
 679        self.assertEqual(computed, actual)
 680
 681    def test_set_file_time_and_date(self):
 682        """Test setting a file time and date."""
 683        file_name = "./Images/home.png"
 684        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 685        # Create a temporary file in the same directory.
 686        temp_file_name = "temporal.tmp"
 687        full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
 688        try:
 689            with open(full_temp_file_name, 'w') as fp:
 690                fp.write("The End of Eternity")
 691        except OSError as detail:
 692            logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s}  Aborting...")
 693            raise UpdateWebException("Failed the unit test for setting time and date of a file.  See the log file for details.  Aborting...") from detail
 694        # Get the old file time.  Set the temporary file to the same time.
 695        file_stat = os.stat(full_file_name)
 696        os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
 697        # What is the temporary file's time now?
 698        file_epoch_time = os.path.getmtime(full_temp_file_name)
 699        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 700        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 701        # Is the temporary file time set properly?
 702        computed = f"file {file_name:s} datetime {d.ctime():s}"
 703        actual = "file ./Images/home.png datetime Wed Jan  1 03:42:41 2025"
 704        self.assertEqual(computed, actual)
 705        os.remove(full_temp_file_name)
 706
 707    def test_difference_of_time_and_date(self):
 708        """Test a date difference calculation."""
 709        file_name = "./Images/home.png"
 710        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 711        # Get the UTC time.
 712        file_epoch_time = os.path.getmtime(full_file_name)
 713        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 714        # Create a datetime object for the file.
 715        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 716        # Slightly change the date and time by adding 1 minute.
 717        d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1)  # year, month, day, hour, minute, second
 718        time_delta = d2 - d
 719        seconds_different = time_delta.total_seconds()
 720        minutes_different = seconds_different / 60.0
 721        hours_different = minutes_different / 60.0
 722        days_different = hours_different / 24.0
 723        computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
 724        actual = "difference  0.00001 days,  0.00028 hours  0.01667 minutes,  1.00000 seconds"
 725        self.assertEqual(computed, actual)
 726
 727    def test_pattern_match_dir_to_skip(self):
 728        """Test if skipping certain named directories is recoginizing the dir names."""
 729        dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
 730        pat = re.compile(self.user_settings.DIR_TO_SKIP)
 731        if pat.search(dir_skip):
 732            self.assertTrue(True)
 733        else:
 734            self.assertTrue(False)
 735
 736    def test_file_name_to_syntax_highlight(self):
 737        """Test if syntax highlighting recognizes file names to highlight."""
 738        file_name1 = "Computer/hello.lsp"
 739        file_name2 = "Computer/life.html"
 740        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
 741        if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
 742            self.assertTrue(True)
 743        else:
 744            self.assertTrue(False)
 745
 746    def test_user_settings(self):
 747        """Test whether user settings are correctly initialized."""
 748        computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
 749        actual = "File size limit = 50000 K"
 750        self.assertEqual(computed, actual, "File size limit settings are incorrect.")
 751
 752    def test_check_replace_substring(self,debug=True):
 753        """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
 754           For troubleshooting, turn on debug.
 755        """
 756        test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
 757        # Iterate over all test strings.
 758        for pair in test_verify_pairs:
 759            [test_string, verify_string] = pair
 760            if debug:
 761                print( f">>>>>>> next test string   = {test_string}")
 762                print( f">>>>>>> next verify string = {verify_string}")
 763            # Iterate over all patterns and replacements.
 764            for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
 765                [pat, rep_string] = match_replace_tuple
 766                print( f"\t-------> next pattern = {pat}") 
 767                print( f"\t-------> next replacement = {rep_string}") 
 768                match = pat.search(test_string)
 769                # The pattern match succeeds.
 770                if match:
 771                    try:
 772                        sub = pat.sub(rep_string, test_string)
 773                        # String replacement succeeds for this pattern/replace pair iteration.
 774                        if debug:
 775                            print( f"\t\t.......> match and replace: {test_string} ---> {sub}")
 776                        test_string = sub
 777                    except IndexError as detail:
 778                        print(f"\t\t.......> Caught an exception: {str(detail):s}.  Replacement failed.")
 779                        if debug:
 780                            self.assertTrue(False)
 781                elif debug:
 782                    print( f"\t\t.......> no match for pattern = {pat} in test string = {test_string}")
 783                # No match, so go on to the next pattern and don't change test_string.
 784            # Done with all pattern/replace on test string.
 785            # Check this test string in the list.
 786            self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
 787            if debug:
 788                print( f"\t******* DONE with all pattern matches and replacements on this test/verify string pair.\n" )
 789
 790# ----------------------------------------------------------------------------
 791#  Command line options.
 792# ----------------------------------------------------------------------------
 793
 794class CommandLineSettings(object):
 795    """Get the command line options."""
 796
 797    def __init__(self, user_settings, raw_args=None):
 798        """Get command line options"""
 799        command_line_parser = argparse.ArgumentParser(
 800            description="updateweb options")
 801
 802        # Log all changes, not just warnings and errors.
 803        command_line_parser.add_argument(
 804            "-v",
 805            "--verbose",
 806            help="Turn on verbose mode to log everything",
 807            action="store_true")
 808
 809        # Clean up the local website only.
 810        command_line_parser.add_argument(
 811            "-c",
 812            "--clean",
 813            help="Do a cleanup on the local web site only.",
 814            action="store_true")
 815
 816        # Clean up the local website only.
 817        command_line_parser.add_argument(
 818            "-m",
 819            "--mathjax",
 820            help="""ALSO upload mathjax directory.\
 821            Do this if you have a new version of MathJax or if you've reset your server and deleted the /mathjax remote directory.\
 822            You'll need to do git restore on any altered files in the local branch and git clean -f to remove any newly created files.\
 823            NOTE:  If you did reset your server and delete all files, run the command    find . -name '*.*' -exec touch {} \\;    from the web page root directory.\
 824            Also run   find . -name '*' -exec touch {} \\;    This will ensure accurate times on the server.""",
 825            action="store_true")
 826
 827        # Run unit tests only.
 828        command_line_parser.add_argument("-t", "--test",
 829                                         help="Run unit tests.",
 830                                         action="store_true")
 831
 832        args = command_line_parser.parse_args(raw_args)
 833
 834        if args.verbose:
 835            user_settings.VERBOSE = True
 836        if args.clean:
 837            user_settings.CLEAN = True
 838        if args.test:
 839            user_settings.UNITTEST = True
 840        if args.mathjax:
 841            user_settings.MATHJAX = True
 842
 843# ----------------------------------------------------------------------------
 844#  Base class which describes my web site overall.
 845# ----------------------------------------------------------------------------
 846
 847class WebSite(object):
 848    """
 849    Abstract class used for analyzing both local and remote (ftp server) websites.
 850    Contains the web-walking functions which traverse the directory structures and files.
 851    These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
 852    Child classes may define additional functions which only they need.
 853    """
 854
 855    def __init__(self, settings):
 856        """Set up root directories"""
 857
 858        # Import the user settings.
 859        self.user_settings = settings
 860
 861        # Queue keeps track of directories not yet processed.
 862        self.queue = []
 863
 864        # List of all directories traversed.
 865        self.directories = []
 866
 867        # List of files traversed, with file information.
 868        self.files = []
 869
 870        # Find out the root directory and go there.
 871        self.root_dir = self.get_root_dir()
 872        self.go_to_root_dir(self.root_dir)
 873
 874    # This is a Python decorator which says get_current_year is a class function.  And so there is no self first argument, and you can call it without creating an 
 875    # instance of this class.  Call it from anywhere, inside or outside the class, using WebSite.get_current_year().  You could just create a global function instead.)
 876    @staticmethod
 877    def get_current_year():
 878        """Get the current year."""
 879        return int(time.gmtime()[0])
 880
 881    @staticmethod
 882    def get_current_two_digit_year():
 883        """Get the last two digits of the current year."""
 884        return WebSite.get_current_year() % 100
 885
 886    @staticmethod
 887    def is_file_info_type(file_info):
 888        """Check if we have a file information structure or merely a simple file name."""
 889        try:
 890            if isinstance(file_info, list):
 891                return True
 892            elif isinstance(file_info, str):
 893                return False
 894            else:
 895                logging.error("is_file_info_type found a bad type.  Aborting...")
 896                raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ")
 897        except TypeError as detail:
 898            logging.error(f"is_file_info_type found a bad type {str(detail):s}.  Aborting...")
 899            raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ") from detail
 900
 901    def get_root_dir(self):
 902        """Subclass:  Put code here to get the root directory"""
 903        return ""
 904
 905    def go_to_root_dir(self, root_dir):
 906        """Subclass:  Put code here to go to the root directory"""
 907        pass  # Pythons's do-nothing statement.
 908
 909    def one_level_down(self, d):
 910        """Subclass:  Fill in with a method which returns a list of the
 911        directories and files immediately beneath dir"""
 912        return [], []
 913
 914    def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
 915        """Walk a directory in either depth first or breadth first order.  BFS is the default."""
 916
 917        # Get all subfiles and subdirectories off this node.
 918        subdirectories, subfiles = self.one_level_down(d)
 919
 920        # Add all the subfiles in order.
 921        for f in subfiles:
 922
 923            name = self.strip_root(f)
 924            logging.debug(f"Webwalking:  Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
 925
 926            # Some files are private so skip them from consideration.
 927            pat = re.compile(self.user_settings.FILE_TO_SKIP)
 928
 929            if pat.search(name[self.user_settings.FILE_NAME]):
 930                logging.warning(
 931                    f"Webwalking:  Skipping private file {name[self.user_settings.FILE_NAME]:s}")
 932            # Don't upload the log file due to file locking problems.
 933            elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
 934                logging.debug(f"Webwalking:  Skipping log file {name[self.user_settings.FILE_NAME]:s}")
 935            # File size limit on some servers.
 936            else:
 937                self.files.append(name)
 938
 939        # Queue up the subdirectories.
 940        for d in subdirectories:
 941            # Some directories are private such as .git or just temporary file
 942            # caches so skip them from consideration.
 943            pat = re.compile(self.user_settings.DIR_TO_SKIP)
 944            if pat.search(d):
 945                logging.warning(f"Webwalking:  Skipping private dir {d:s}")
 946            else:
 947                logging.debug(f"Webwalking:  Pushing dir {d:s} on the queue.")
 948                self.queue.append(d)
 949
 950        # Search through the directories.
 951        while len(self.queue) > 0:
 952            # For breadth first search, remove from beginning of queue.
 953            if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
 954                d = self.queue.pop(0)
 955
 956            # For depth first search, remove from end of queue.
 957            elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
 958                d = self.queue.pop()
 959            else:
 960                d = self.queue.pop(0)
 961
 962            name = self.strip_root(d)
 963            logging.debug(f"Webwalking:  Adding relative directory {name:s} to list, full path = {d:s}.")
 964            self.directories.append(name)
 965
 966            self.walk(d)
 967
 968    def strip_root(self, file_info):
 969        """Return a path, but strip off the root directory"""
 970
 971        root = self.root_dir
 972
 973        # Extract the file name.
 974        if self.is_file_info_type(file_info):
 975            name = file_info[self.user_settings.FILE_NAME]
 976        else:
 977            name = file_info
 978
 979        # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
 980        # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
 981        # Art/foo.txt
 982        lenroot = len(root)
 983        if root == self.user_settings.DEFAULT_ROOT_DIR:
 984            pass
 985        else:
 986            lenroot = lenroot + 1
 987
 988        stripped_path = name[lenroot:]
 989
 990        if self.is_file_info_type(file_info):
 991            # Update the file name only.
 992            return [stripped_path,
 993                    file_info[self.user_settings.FILE_TYPE],
 994                    file_info[self.user_settings.FILE_DATE_TIME],
 995                    file_info[self.user_settings.FILE_SIZE]]
 996        else:
 997            return stripped_path
 998
 999    def append_root_dir(self, root_dir, name):
1000        """Append the root directory to a path"""
1001
1002        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1003        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1004        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1005            return root_dir + name
1006        else:
1007            return root_dir + "/" + name
1008
1009    def scan(self):
1010        """Scan the directory tree recursively from the root"""
1011        logging.debug(f"Webwalking:  Beginning recursive directory scan from root directory {self.root_dir:s}")
1012        self.walk(self.root_dir)
1013
1014    def modtime(self, f):
1015        """Subclass:  Get file modification time"""
1016        pass
1017
1018    def finish(self):
1019        """Quit web site"""
1020        logging.debug(f"Finished with WebSite object of class {type(self)}")
1021        pass
1022
1023# ----------------------------------------------------------------------------
1024#  Subclass which knows about the local web site on disk.
1025# ----------------------------------------------------------------------------
1026
1027class LocalWebSite(WebSite):
1028    """Walk the local web directory on local disk down from the root.
1029    Clean up temporary files and do other cleanup work."""
1030
1031    def __init__(self, settings):
1032        """Go to web page root and list all files and directories."""
1033
1034        # Initialize the parent class.
1035        WebSite.__init__(self, settings)
1036
1037        self.root_dir = self.get_root_dir()
1038        logging.debug(f"LocalWebSite.__init__():  \tRoot directory: {self.root_dir:s}")
1039
1040    def get_root_dir(self):
1041        """Get the name of the root directory"""
1042        return self.user_settings.local_root_dir
1043
1044    def go_to_root_dir(self, root_dir):
1045        """Go to the root directory"""
1046
1047        # Go to the root directory.
1048        logging.debug(f"LocalWebSite.go_to_root_dir():  \tchdir to root directory:  {root_dir:s}")
1049        os.chdir(root_dir)
1050
1051        # Read it back.
1052        self.root_dir = os.getcwd()
1053        logging.debug(f"LocalWebSite.go_to_root_dir():  \tgetcwd root directory:  {self.root_dir:s}")
1054
1055    def one_level_down(self, d):
1056        """List all files and subdirectories in the current directory, dir.  For files, collect file info
1057        such as time, date and size."""
1058
1059        directories = []
1060        files = []
1061
1062        # Change to current directory.
1063        os.chdir(d)
1064
1065        # List all subdirectories and files.
1066        dir_list = os.listdir(d)
1067
1068        if dir_list:
1069            for line in dir_list:
1070                # Add the full path prefix from the root.
1071                name = self.append_root_dir(d, line)
1072                logging.debug(f"LocalWebSite.one_level_down():  \tlocal dir or file {name:s}")
1073
1074                # Is it a directory or a file?
1075                if os.path.isdir(name):
1076                    directories.append(name)
1077                elif os.path.isfile(name):
1078                    # First assemble the file information of name, time/date and size into a list.
1079                    # Can index it like an array.  For example,
1080                    # file_info = 
1081                    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
1082                    #      1,                                           -- Enum type FileType.FILE = 1.
1083                    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a date/time class.
1084                    #      4675]                                        -- File size in bytes.
1085                    file_info = [name,
1086                                 FileType.FILE,
1087                                 self.get_file_date_time(name),
1088                                 self.get_file_size(name)]
1089                    files.append(file_info)
1090
1091        # Sort the names into order.
1092        if directories:
1093            directories.sort()
1094        if files:
1095            files.sort()
1096
1097        return directories, files
1098
1099    @staticmethod
1100    def get_file_date_time(file_name):
1101        """Get a local file time and date in UTC."""
1102
1103        file_epoch_time = os.path.getmtime(file_name)
1104        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1105        # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1106        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1107        return d
1108
1109    @staticmethod
1110    def get_file_size(file_name):
1111        """Get file size in bytes."""
1112        return os.path.getsize(file_name)
1113
1114    @staticmethod
1115    def clean_up_temp_file(temp_file_name, file_name, changed):
1116        """Remove the original file, rename the temporary file name to the original name.
1117        If there are no changes, just remove the temporary file.
1118        """
1119
1120        if changed:
1121            # Remove the old file now that we have the rewritten file.
1122            try:
1123                os.remove(file_name)
1124                logging.debug(f"Changes were made.  Removed original file {file_name:s}")
1125            except OSError as detail:
1126                logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}.  Need to remove it manually.")
1127
1128            # Rename the new file to the old file name.
1129            try:
1130                os.rename(temp_file_name, file_name)
1131                logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1132            except OSError as detail:
1133                logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}.  Need to rename manually")
1134        else:
1135            # No changes?  Remove the temporary file.
1136            try:
1137                os.remove(temp_file_name)
1138                logging.debug(f"No changes were made.  Removed temporary file {temp_file_name:s}")
1139            except OSError as detail:
1140                logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}.  Need to remove it manually.")
1141        return
1142
1143    @staticmethod
1144    def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1145        """
1146        Process each line of a file with a list of functions.  Create a new temporary file.
1147
1148        The default list is None which means make an exact copy.
1149        """
1150
1151        # Assume no changes.
1152        changed = False
1153
1154        # Open both input and output files for processing.  Check if we cannot do it.
1155        fin = None
1156        try:
1157            fin = open(in_file_name, "r")
1158        except IOError as detail:
1159            logging.error(f"process_lines_of_file():  \tCannot open file {in_file_name:s} for reading:  {str(detail):s} Aborting...")
1160            if fin is not None:
1161                fin.close()
1162            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1163        fout = None
1164        try:
1165            fout = open(out_file_name, "w")
1166        except IOError as detail:
1167            logging.error(f"process_lines_of_file():  \tCannot open file {out_file_name:s} for writing:  {str(detail):s} Aborting...")
1168            if fout is not None:
1169                fout.close()
1170            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1171
1172        # Read each line of the file, aborting if there is a read error.
1173        try:
1174            line = fin.readline()
1175
1176            # Rewrite the next line of the file using all the rewrite functions.
1177            while line:
1178                original_line = line
1179                # If we have one or more rewrite functions...
1180                if process_line_function_list is not None:
1181                    # ...apply each rewrite functions to the line, one after the other in order.
1182                    for processLineFunction in process_line_function_list:
1183                        if processLineFunction is not None:
1184                            line = processLineFunction(line)
1185
1186                if original_line != line:
1187                    logging.debug("Rewrote the line" + ":" + f"|{original_line:s}|" + "into" + ":" + f"|{line:s}| for file" + ":" + f"{in_file_name:s}")
1188                    changed = True
1189
1190                fout.write(line)
1191
1192                line = fin.readline()
1193
1194            fin.close()
1195            fout.close()
1196        except IOError as detail:
1197            logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s}  Aborting...")
1198            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1199
1200        if changed:
1201            logging.debug(f"process_lines_of_file():  \tRewrote original file {in_file_name:s}."
1202                          f"Changes are in temporary copy {out_file_name:s}")
1203
1204        # Return True if any lines were changed.
1205        return changed
1206
1207    def clean(self):
1208        """Scan through all directories and files in the local on disk website and clean them up."""
1209
1210        num_source_files_changed = 0
1211        num_source_files_syntax_highlighted = 0
1212
1213        logging.debug("Cleaning up the local web page.")
1214
1215        if self.directories is None or self.files is None:
1216            logging.error("Web site has no directories or files.  Aborting...")
1217            raise UpdateWebException("Internal error for cleaning up the local web site.  See the log file for details.  Aborting... ")
1218
1219        for d in self.directories:
1220
1221            if self.is_temp_dir(d):
1222                # Add the full path prefix from the root.
1223                name = self.append_root_dir(self.get_root_dir(), d)
1224                try:
1225                    logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1226                    shutil.rmtree(name)
1227                except OSError as detail:
1228                    logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1229
1230        for f in self.files:
1231            # Add the full path prefix from the root.
1232            full_file_name = self.append_root_dir(
1233                self.get_root_dir(), f[self.user_settings.FILE_NAME])
1234
1235            # Remove all temporary files.
1236            if self.is_temp_file(f):
1237                try:
1238                    logging.debug(f"Removing temp file {full_file_name:s}")
1239                    os.remove(full_file_name)
1240                except OSError as detail:
1241                    logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1242
1243            # Update source code files.
1244            if self.is_source_or_hypertext_file(f):
1245                changed = self.rewrite_source_file(full_file_name)
1246                if changed:
1247                    num_source_files_changed += 1
1248                    logging.debug(f"Rewrote source code file {self.root_dir:s}")
1249
1250            # Generate a  syntax highlighted code listing.  
1251            # Make it the same time and date as the original code.  Then, only if there are recent changes, we will update the remote server.
1252            if self.is_file_to_syntax_highlight(f):
1253                # syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1254                syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name)
1255                if syntax_highlighted_file_name is not None:
1256                    logging.debug(f"Generated a syntax highlighted source listing file {syntax_highlighted_file_name:s} for the file {full_file_name:s}")
1257                else:
1258                    logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1259                num_source_files_syntax_highlighted += 1
1260
1261        logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1262        logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1263
1264    def is_temp_file(self, file_info):
1265        """Identify a file name as a temporary file"""
1266
1267        file_name = file_info[self.user_settings.FILE_NAME]
1268
1269        # Suffixes and names for temporary files be deleted.
1270        pat = self.user_settings.TEMP_FILE_SUFFIXES
1271        match = pat.search(file_name)
1272        # Remove any files containing twiddles anywhere in the name.
1273        if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1274            return True
1275
1276        return False
1277
1278    def is_temp_dir(self, dir_name):
1279        """Identify a name as a temporary directory."""
1280
1281        p = self.user_settings.TEMP_DIR_SUFFIX
1282        return p.search(dir_name)
1283
1284    def is_source_or_hypertext_file(self, file_info):
1285        """ Check if the file name is a source file or a hypertext file."""
1286
1287        file_name = file_info[self.user_settings.FILE_NAME]
1288        p1 = self.user_settings.SOURCE_FILE_PATTERN
1289        p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1290        if p1.search(file_name) or p2.search(file_name):
1291            return True
1292        else:
1293            return False
1294
1295    def is_file_to_syntax_highlight(self, file_info):
1296        """Check if this file type should have a syntax highlighted source listing."""
1297
1298        # Take apart the file name.
1299        full_file_name = file_info[self.user_settings.FILE_NAME]
1300        file_name = Path(full_file_name).name
1301
1302        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1303        if p.search(file_name):
1304            return True
1305        else:
1306            return False
1307
1308    def rewrite_substring(self, line):
1309        """Rewrite a line containing a pattern of your choice"""
1310
1311        # Start with the original unchanged line.
1312        rewritten_line = line
1313
1314        # Do the replacements in order from first to last.
1315        for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1316            # Get the next pattern match replacement string tuple.
1317            [pat, rep_string] = match_replace_tuple
1318            # Does it match?  Then do string substitution, else leave the line unchanged.
1319            match = pat.search(rewritten_line)
1320            if match:
1321                # Now we have these cases:
1322                #     -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1323                #     -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ... 
1324                #      If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1325                try:
1326                    sub = pat.sub(rep_string, rewritten_line)
1327                    rewritten_line = sub
1328                except IndexError as detail:
1329                    logging.error(f"ERROR: {str(detail):s}.  Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml.    Did not rewrite the line |{rewritten_line:s}|")
1330 
1331        return rewritten_line
1332
1333    def rewrite_email_address_line(self, line):
1334        """Rewrite lines containing old email addresses."""
1335
1336        # Search for the old email address.
1337        pat = self.user_settings.OLD_EMAIL_ADDRESS
1338        match = pat.search(line)
1339
1340        # Replace the old address with my new email address.
1341        if match:
1342            new_address = self.user_settings.NEW_EMAIL_ADDRESS
1343            sub = pat.sub(new_address, line)
1344            line = sub
1345
1346        return line
1347
1348    def rewrite_version_line(self, line):
1349        """Rewrite lines containing the current version of software."""
1350
1351        # Search for the current version.
1352        pat = self.user_settings.CURRENT_SOFTWARE_VERSION
1353        match = pat.search(line)
1354
1355        # Replace with the new version.
1356        if match:
1357            # Note that since we are using raw strings leading and trailing
1358            # whitespace is ignored.
1359            new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
1360            sub = pat.sub(new_version, line)
1361            line = sub
1362
1363        return line
1364
1365    def rewrite_copyright_line(self, line):
1366        """Rewrite copyright lines if they are out of date."""
1367
1368        # Match the lines,
1369        #     Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1370        #     Copyright &copy; nnnn-mmmm by Sean Erik O'Connor.
1371        # and pull out the old year and save it.
1372        pat = self.user_settings.COPYRIGHT_LINE
1373        match = pat.search(line)
1374
1375        # Found a match.
1376        if match:
1377            old_year = int(match.group('old_year'))
1378
1379            # Replace the old year with the current year.
1380            # We matched and extracted the old copyright symbol into the variable
1381            # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
1382            # We now insert it back by placing the special syntax \g<symbol>
1383            # into the replacement string.
1384            if old_year < WebSite.get_current_year():
1385                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
1386                                str(WebSite.get_current_year())
1387                sub = pat.sub(new_copyright, line)
1388                line = sub
1389        return line
1390
1391    def rewrite_last_update_line(self, line):
1392        """Rewrite the Last Updated line if the year is out of date."""
1393
1394        # Match the last updated line and pull out the year.
1395        #      last updated 01 Jan 25.
1396        p = self.user_settings.LAST_UPDATED_LINE
1397        m = p.search(line)
1398
1399        if m:
1400            last_update_year = int(m.group('year'))
1401
1402            # Convert to four digit years.
1403            if last_update_year > 90:
1404                last_update_year += 1900
1405            else:
1406                last_update_year += 2000
1407
1408            # If the year is old, rewrite to "01 Jan <current year>".
1409            if last_update_year < WebSite.get_current_year():
1410                two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1411                sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1412                line = sub
1413
1414        return line
1415
1416    def rewrite_source_file(self, file_name):
1417        """Rewrite copyright lines, last updated lines, etc."""
1418        changed = False
1419
1420        # Create a new temporary file name for the rewritten file.
1421        temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1422
1423        # Apply changes to all lines of the temporary file.  Apply change functions in
1424        # the sequence listed.
1425        if self.process_lines_of_file(file_name, temp_file_name,
1426                                      [self.rewrite_copyright_line,
1427                                       self.rewrite_last_update_line,
1428                                       self.rewrite_email_address_line,
1429                                       self.rewrite_substring,
1430                                       self.rewrite_version_line]):
1431            logging.debug(f"Changed (rewritten) source file {file_name:s}")
1432            changed = True
1433
1434        # Rename the temporary file to the original file name.  If no changes, just delete the temp file.
1435        self.clean_up_temp_file(temp_file_name, file_name, changed)
1436
1437        return changed
1438
1439    @staticmethod
1440    def create_syntax_highlighted_code_listing(source_file_name, **kwargs):
1441        """Create a syntax highlighted source listing for the file and return its name.  Return None if there is an error.
1442        Keep the same date/time as the original file."""
1443
1444        # kwargs is a dictionary for key, value in kwargs.items():
1445        # for key, value in kwargs.items():
1446        #    if key in kwargs:
1447        #        print( f"kwargs:" )
1448        #        print( f"  key   = |{key}|")
1449        #        print( f"  value = |{value}|" )
1450        dry_run_value = kwargs.get('dry_run') 
1451        dry_run = False
1452        if dry_run_value is not None and dry_run_value is True:
1453            dry_run = True
1454
1455        # Take apart the file name.
1456        file_name_without_extension = Path(source_file_name).stem
1457        file_extension = Path(source_file_name).suffix
1458
1459        # Append *.html to the source code file name.  This will be the syntax highlighted code listing.
1460        syntax_highlighted_file_name = f"{source_file_name}.html"
1461
1462        # In the special case of Jupyter notebooks, use the Jupyter to HTML converter.
1463        if file_extension == ".ipynb":
1464            if dry_run:
1465                logging.debug(f"Dry run only:  don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1466                return None
1467            # Python manual recommends using the run() command instead of Popen().  See https://docs.python.org/3/library/subprocess.html#subprocess.run
1468            try:
1469                shell_command = f"jupyter nbconvert {source_file_name} --to html --output {syntax_highlighted_file_name}"
1470                # Throw an exception if we can't run the process.  
1471                # Capture the standard output and standar error and dump to /dev/null so it doesn't print to the command line when running this script.
1472                # Since the shell command is a single string, use shell=True in the run() command.
1473                subprocess.run([shell_command],shell=True,check=True,stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
1474            except subprocess.CalledProcessError as detail: 
1475                logging.error(f"Cannot convert the Jupyter file {source_file_name:s} to a syntax highlighted file: {str(detail):s}  Aborting...")
1476                return None
1477        # Otherwise, use the Pygments syntax highlighter.
1478        else:
1479            # First choose the language lexer from the file name itself if there's no extension.
1480            # Dotted file names are treated as the entire file name.
1481            match file_name_without_extension:
1482                case "makefile":
1483                    lexer = MakefileLexer()
1484                case ".bash_profile"|".bashrc"|".bash_logout":
1485                    lexer = BashLexer()
1486                case ".vimrc":
1487                    lexer = VimLexer()
1488                case ".gitignore_global" | ".gitignore" | ".gitconfig":
1489                    lexer = OutputLexer() # No formatting.
1490                case _:
1491                    # Choose the language lexer from the file extension.  Web stuff first, then programming languages.
1492                    match file_extension:
1493                        case ".html":
1494                            lexer = HtmlLexer()
1495                        case ".css":
1496                            lexer = CssLexer()
1497                        case ".js":
1498                            lexer = JavascriptLexer()
1499                        case ".sh":
1500                            lexer = BashLexer()
1501                        case ".py":
1502                            lexer = PythonLexer()
1503                        case ".c" | ".h":
1504                            lexer = CLexer()
1505                        case ".hpp" | ".cpp":
1506                            lexer = CppLexer()
1507                        case ".lsp":
1508                            lexer = CommonLispLexer()
1509                        case ".for" | ".FOR" | ".f":
1510                            lexer = FortranFixedLexer()  # Fixed format FORTRAN, not FORTRAN 90.
1511                        case ".txt" | ".dat":            # Generic data file;  no formatting.
1512                            lexer = OutputLexer()
1513                        case ".tex":
1514                            lexer = TexLexer()           # LaTeX, TeX, or related files.
1515                        case ".m":
1516                            lexer = MatlabLexer()
1517                        case ".yaml":
1518                            lexer = YamlLexer()
1519                        case _:
1520                            logging.error(f"Can't find a lexer for file {source_file_name}.  Cannot generate a syntax highlighted source listing.  Aborting...")
1521                            return None
1522
1523            # Read the source code file into a single string.
1524            try:
1525                with open(source_file_name, 'r') as fp:
1526                    source_file_string = fp.read()
1527            except OSError as detail:
1528                logging.error(f"Cannot read the source code file {source_file_name:s} for syntax highlighting: {str(detail):s}  Aborting...")
1529
1530            # Top level Pygments function generates the HTML for the highlighted code.
1531            highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1532
1533            # The style sheet is always the same for all languages.
1534            style_sheet = HtmlFormatter().get_style_defs('.highlight')
1535
1536            # Write out the syntax colored file.
1537            if dry_run:
1538                logging.debug(f"Dry run only:  don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1539                return None
1540            else:
1541                try:
1542                    # Write out the highlighted code listing in HTML with CSS style sheet attached.
1543                    with open(syntax_highlighted_file_name, 'w') as fp:
1544                        fp.write(UserSettings.BASIC_HTML_BEGIN)
1545                        fp.write(style_sheet)
1546                        fp.write(UserSettings.BASIC_HTML_MIDDLE)
1547                        fp.write(highlighted_html_source_file_string)
1548                        fp.write(UserSettings.BASIC_HTML_END)
1549                except OSError as detail:
1550                    logging.error(f"Cannot write the syntax highlighted file {syntax_highlighted_file_name:s}: {str(detail):s}  Aborting...")
1551        # ------- end Pygments syntax highlighter
1552
1553        # Set the syntax highlighted code file to the same modification and access time and date as the source file.
1554        file_stat = os.stat(source_file_name)
1555        os.utime(syntax_highlighted_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1556
1557        # Are the original source and the syntax highlighted code the same data and time?
1558        dates_and_times_source_file_name             = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1559        dates_and_times_syntax_highlighted_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1560        if dates_and_times_source_file_name != dates_and_times_syntax_highlighted_file_name:
1561            logging.error(f"Source code and syntax highlighted source don't have the same times.  source time = {dates_and_times_source_file_name.ctime():s} syntax highlighted time = {dates_and_times_syntax_highlighted_file_name.ctime():s} Aborting...")
1562            return None
1563
1564        logging.debug(f"Generated a syntax highlighted listing {syntax_highlighted_file_name:s} for the source code file {source_file_name:s} with the same time and date = {dates_and_times_source_file_name.ctime():s}")
1565        return syntax_highlighted_file_name
1566
1567# ----------------------------------------------------------------------------
1568#   Subclass which knows about the remote web site.
1569# ----------------------------------------------------------------------------
1570
1571class RemoteWebSite(WebSite):
1572    """Walk the remote web directory on a web server down from the root.
1573       Use FTP commands:
1574           https://en.wikipedia.org/wiki/List_of_FTP_commands
1575       Use the Python ftp library:
1576           https://docs.python.org/3/library/ftplib.html
1577    """
1578
1579    def __init__(self, user_settings):
1580        """Connect to FTP server and list all files and directories."""
1581
1582        # Root directory of FTP server.
1583        self.root_dir = user_settings.FTP_ROOT_NAME
1584        logging.debug(f"Set the remote web site ftp root dir = {self.root_dir:s}")
1585
1586        # Connect to FTP server and log in.
1587        try:
1588            # self.ftp.set_debuglevel( 2 )
1589            print( f"Trying ftp login to server name = {user_settings.SERVER_NAME} user name = {user_settings.USER_NAME} password =  {user_settings.PASSWORD_NAME}")
1590            self.ftp = ftplib.FTP(user_settings.SERVER_NAME)
1591            self.ftp.login(user_settings.USER_NAME, user_settings.PASSWORD_NAME)
1592        # Catch all exceptions with the parent class Exception:  all built-in,
1593        # non-system-exiting exceptions are derived from this class.
1594        except Exception as detail:
1595            # Extract the string message from the exception class with str().
1596            logging.error(f"Remote web site cannot login to ftp server: {str(detail):s}  Aborting...")
1597            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1598        else:
1599            logging.debug("Remote web site ftp login succeeded.")
1600
1601        logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1602
1603        # Initialize the superclass.
1604        WebSite.__init__(self, user_settings)
1605
1606    def go_to_root_dir(self, root_dir):
1607        """Go to the root directory"""
1608
1609        try:
1610            # Go to the root directory.
1611            self.ftp.cwd(root_dir)
1612            logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1613
1614            # Read it back.
1615            self.root_dir = self.ftp.pwd()
1616            logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1617
1618        except Exception as detail:
1619            logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1620            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1621
1622    def get_root_dir(self):
1623        """Get the root directory name"""
1624
1625        return self.root_dir
1626
1627    def finish(self):
1628        """Quit remote web site"""
1629        logging.debug(f"Finished with WebSite object of class {type(self)}")
1630        try:
1631            self.ftp.quit()
1632        except Exception as detail:
1633            logging.error(f"Cannot ftp quit: {str(detail):s}")
1634
1635    def one_level_down(self, d):
1636        """List files and directories in a subdirectory using ftp"""
1637
1638        directories = []
1639        files = []
1640
1641        try:
1642            # ftp listing from current dir.
1643            logging.debug(f"RemoteWebSite.one_level_down():  \tftp cwd: {d:s}")
1644            self.ftp.cwd(d)
1645            dir_list = []
1646
1647            # Use the nonstandard -a option in LIST to show all the hidden .* files.
1648            # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1649            # Note the second argument requires a callback function.
1650            self.ftp.retrlines('LIST -a', dir_list.append)
1651
1652        except Exception as detail:
1653            logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}:  {str(detail):s} Aborting...")
1654            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1655
1656        for line in dir_list:
1657            logging.debug(f"RemoteWebSite.one_level_down():  \tftp LIST: {line:s}")
1658
1659            # Line should at least have the minimum FTP information.
1660            if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1661                # Parse the FTP LIST and put the pieces into file_info.
1662                file_info = self.parse_ftp_list(line)
1663                logging.debug(f"RemoteWebSite.one_level_down():  \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1664
1665                # Skip over the UNIX hidden files for current and parent directories . and ..  Also skip over any NULL file names.
1666                if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1667                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1668                    pass
1669                # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1670                elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1671                    dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1672                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp dir (full path): {dirname:s}")
1673                    directories.append(dirname)
1674                # For a file:  Add the full path prefix from the root to the file name.
1675                else:
1676                    file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1677                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp file (full path):\
1678                        {file_info[self.user_settings.FILE_NAME]:s}")
1679                    files.append(file_info)
1680            else:
1681                logging.error(f"RemoteWebSite.one_level_down():  \tFTP LIST line is too short:  {line:s}")
1682
1683        directories.sort()
1684        files.sort()
1685
1686        return directories, files
1687
1688    def modtime(self, f):
1689        """Get the modification time of a file via ftp.  Return 0 if ftp cannot get it."""
1690        modtime = 0
1691
1692        try:
1693            response = self.ftp.sendcmd('MDTM ' + f)
1694            # MDTM returns the last modified time of the file in the format
1695            # "213 YYYYMMDDhhmmss \r\n <error-response>
1696            # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1697            # error-response is 550 for info not available, and 500 or 501 if command cannot
1698            # be parsed.
1699            if response[:3] == '213':
1700                modtime = response[4:]
1701        except ftplib.error_perm as detail:
1702            logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1703            modtime = 0
1704
1705        return modtime
1706
1707    def parse_ftp_list(self, line):
1708        """Parse the ftp file listing and return file name, datetime and file size.
1709
1710           An FTP LIST command will give output which looks like this for a file:
1711
1712               -rw-r--r--    1 1000       free             4084 Jul 18 16:55 sparkCoil.png
1713
1714           and for a directory:
1715
1716                drwxr-xr-x    2 1000       free             4096 Jul 18 16:36 ReadingList
1717
1718           FTP uses UTC for its listings; the conversion to local time is done by the OS.
1719           We can have problems on New Year's Eve.  For example, the local file date/time is
1720
1721              Mon Jan  1 06:23:12 2018
1722
1723           But the remote file date/time from FTP listing doesn't show a year even though we
1724           know it was written to the server in 2017.
1725
1726               Mon Dec 31 03:02:00
1727
1728           So we default the remote file year to current year 2018 and get
1729
1730               Mon Dec 31 03:02:00 2018
1731
1732           Now we think that the remote file is newer by 363.860278 days.
1733        """
1734
1735        # Find out if we've a directory or a file.
1736        if line[0] == 'd':
1737            dir_or_file = FileType.DIRECTORY
1738        else:
1739            dir_or_file = FileType.FILE
1740
1741        pattern = self.user_settings.FTP_LISTING
1742
1743        # Sensible defaults.
1744        filesize = 0
1745        filename = ""
1746        # Default the time to midnight.
1747        hour = 0
1748        minute = 0
1749        seconds = 0
1750        # Default the date to Jan 1 of the current year.
1751        month = 1
1752        day = 1
1753        year = WebSite.get_current_year()
1754
1755        # Extract time and date from the ftp listing.
1756        match = pattern.search(line)
1757
1758        if match:
1759            filesize = int(match.group('bytes'))
1760            month = self.user_settings.monthToNumber[match.group('mon')]
1761            day = int(match.group('day'))
1762
1763            # Remote file listing contains the year.  The FTP listing will omit the hour and minute.
1764            if match.group('year'):
1765                year = int(match.group('year'))
1766                logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1767            else:
1768                # Remote file listing omits the year.  Default the year to the current UTC time year.
1769                # That may be incorrect (see comments above).
1770                year = WebSite.get_current_year()
1771                logging.debug(f"ftp is missing the year;  use the current year = {year}")
1772
1773            # If the FTP listing has the hour and minute, it will omit the year.
1774            if match.group('hour') and match.group('min'):
1775                hour = int(match.group('hour'))
1776                minute = int(match.group('min'))
1777                logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1778
1779            filename = match.group('filename')
1780
1781        # Package up the time and date nicely.
1782        # Note if we didn't get any matches, we'll default the remote date and
1783        # time to Jan 1 midnight of the current year.
1784        d = datetime.datetime(year, month, day, hour, minute, seconds)
1785
1786        return [filename, dir_or_file, d, filesize]
1787
1788# ----------------------------------------------------------------------------
1789#  Class for synchronizing local and remote web sites.
1790# ----------------------------------------------------------------------------
1791
1792class UpdateWeb(object):
1793    """Given previously scanned local and remote directories, update the remote website."""
1794
1795    def __init__(
1796            self,
1797            user_settings,
1798            local_directory_list,
1799            local_file_info,
1800            remote_directory_list,
1801            remote_file_info):
1802        """Connect to remote site.  Accept previously scanned local and remote files and directories."""
1803
1804        # Initialize from args.
1805        self.user_settings = user_settings
1806        self.local_directory_list = local_directory_list
1807        self.remote_directory_list = remote_directory_list
1808        self.local_file_info = local_file_info
1809        self.remote_file_info = remote_file_info
1810
1811        # Initialize defaults.
1812        self.local_files_list = []
1813        self.remote_files_list = []
1814        self.local_file_to_size = {}
1815        self.local_file_to_date_time = {}
1816        self.remote_file_to_date_time = {}
1817        self.local_only_dirs = []
1818        self.local_only_files = []
1819        self.remote_only_dirs = []
1820        self.remote_only_files = []
1821        self.common_files = []
1822
1823        # Connect to FTP server and log in.
1824        try:
1825            self.ftp = ftplib.FTP(self.user_settings.SERVER_NAME)
1826            self.ftp.login(self.user_settings.USER_NAME, self.user_settings.PASSWORD_NAME)
1827        except Exception as detail:
1828            logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1829            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1830        else:
1831            logging.debug("ftp login succeeded.")
1832
1833        logging.debug(f"ftp server welcome message:  {self.ftp.getwelcome():s}")
1834
1835        # Local root directory.
1836        self.local_root_dir = self.user_settings.local_root_dir
1837        logging.debug(f"Local root directory: {self.local_root_dir:s}")
1838
1839        # Root directory of FTP server.
1840        self.ftp_root_dir = self.user_settings.FTP_ROOT_NAME
1841        logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1842
1843        # Transform KB string to integer bytes.  e.g. "200" => 2048000
1844        self.file_size_limit = int(self.user_settings.FILE_SIZE_LIMIT_NAME) * 1024
1845
1846        try:
1847            # Go to the root directory.
1848            self.ftp.cwd(self.ftp_root_dir)
1849
1850            # Read it back.
1851            self.ftp_root_dir = self.ftp.pwd()
1852            logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1853        except Exception as detail:
1854            logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1855
1856    def append_root_dir(self, root_dir, name):
1857        """Append the root directory to a path"""
1858
1859        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1860        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1861        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1862            return root_dir + name
1863        else:
1864            return root_dir + "/" + name
1865
1866    def file_info(self):
1867        """Create lists of file names from the file information.  Also create dictionaries which map file names onto
1868        dates, times, and sizes."""
1869
1870        # Extract file names.
1871        self.local_files_list = [
1872            file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1873        self.remote_files_list = [
1874            file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1875
1876        # Use a dictionary comprehension to create key/value pairs, 
1877        #     (file name, file date/time)
1878        # which map file names onto date/time.
1879        self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1880        self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1881
1882        # Dictionary comprehension creates a mapping of local file names onto file sizes.
1883        self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1884
1885    def update(self):
1886        """Scan through the local website, cleaning it up.
1887        Go to remote website on my servers and synchronize all files."""
1888
1889        self.file_info()
1890
1891        # Which files and directories are different.
1892        self.changes()
1893
1894        # Synchronize with the local web site.
1895        self.synchronize()
1896
1897    def changes(self):
1898        """Find the set of different directories and files on local and remote."""
1899
1900        # Add all directories which are only on local to the dictionary.
1901        dir_to_type = {
1902            d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1903
1904        # Scan through all remote directories, adding those only on remote or
1905        # on both.
1906        for d in self.remote_directory_list:
1907            if d in dir_to_type:
1908                dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1909            else:
1910                dir_to_type[d] = FileType.ON_REMOTE_ONLY
1911
1912        # Add all files which are only on local to the dictionary.
1913        file_to_type = {
1914            f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1915
1916        # Scan through all remote files, adding those only on remote or on
1917        # both.
1918        for f in self.remote_files_list:
1919            if f in file_to_type:
1920                file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1921            else:
1922                file_to_type[f] = FileType.ON_REMOTE_ONLY
1923
1924        logging.debug("Raw dictionary dump of directories")
1925        for k, v in dir_to_type.items():
1926            logging.debug(f"\t dir:  {str(k):s}  type: {str(v):s}")
1927
1928        logging.debug("Raw dictionary dump of files")
1929        for k, v in file_to_type.items():
1930            logging.debug(f"\t file: {str(k):s}  type: {str(v):s}")
1931
1932        # List of directories only on local.  Keep the ordering.
1933        self.local_only_dirs = [
1934            d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1935
1936        # List of directories only on remote.  Keep the ordering.
1937        self.remote_only_dirs = [
1938            d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1939
1940        # We don't care about common directories, only their changed files, if
1941        # any.
1942
1943        # List of files only on local.  Keep the ordering.
1944        self.local_only_files = [
1945            f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1946
1947        # List of files only on remote.  Keep the ordering.
1948        self.remote_only_files = [
1949            f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1950
1951        # List of common files on both local and remote.  Keep the ordering.
1952        self.common_files = [
1953            f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1954
1955        logging.debug("*** Directories only on local ******************************")
1956        for d in self.local_only_dirs:
1957            logging.debug(f"\t {d:s}")
1958
1959        logging.debug("*** Directories only on remote ******************************")
1960        for d in self.remote_only_dirs:
1961            logging.debug(f"\t {d:s}")
1962
1963        logging.debug("*** Files only on local ******************************")
1964        for f in self.local_only_files:
1965            logging.debug(f"\t {f:s}")
1966
1967        logging.debug("*** Files only on remote ******************************")
1968        for f in self.remote_only_files:
1969            logging.debug(f"\t {f:s}")
1970
1971        logging.debug("*** Common files ******************************")
1972        for f in self.common_files:
1973            logging.debug(f"name {f:s}")
1974            logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1975            logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1976
1977    def synchronize(self):
1978        """Synchronize files and subdirectories in the remote directory with the local directory."""
1979
1980        # If we have the same files in local and remote, compare their times
1981        # and dates.
1982        for f in self.common_files:
1983            local_file_time = self.local_file_to_date_time[f]
1984            remote_file_time = self.remote_file_to_date_time[f]
1985
1986            # What's the time difference?
1987            time_delta = remote_file_time - local_file_time
1988            # How much difference, either earlier or later?
1989            seconds_different = abs(time_delta.total_seconds())
1990            minutes_different = seconds_different / 60.0
1991            hours_different = minutes_different / 60.0
1992            days_different = hours_different / 24.0
1993
1994            # Assume no upload initially.
1995            upload_to_host = False
1996
1997            logging.debug(f"Common file:  {f:s}.")
1998
1999            # Remote file time is newer.
2000            # Allow 200 characters
2001            # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
2002
2003            if remote_file_time > local_file_time:
2004                # Remote file time is MUCH newer:  suspect time is out of joint on the server, so upload local local file to be safe.
2005                if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
2006                    logging.error(f"Remote file {f:s} is MUCH newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes]. Upload the file to be safe.")
2007                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
2008                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
2009
2010                    # Set the local file to the current time.
2011                    full_file_name = self.append_root_dir(
2012                        self.local_root_dir, f)
2013                    if os.path.exists(full_file_name):
2014                        # Change the access and modify times of the file to the current time.
2015                        os.utime(full_file_name, None)
2016                        logging.error(f"Touching local file {full_file_name:s} to make it the current time")
2017
2018                    upload_to_host = True
2019                # Remote file time is newer, but not by much.  Let's just assume a slight time mismatch on the server.  Don't upload.
2020                else:
2021                    logging.warning(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
2022                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
2023                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
2024                    upload_to_host = False
2025
2026            # Local file time is newer.
2027            elif local_file_time > remote_file_time:
2028                # Local file time slightly newer than the remote file.  So we are pretty sure the local file really got changed vs the server file.
2029                if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
2030                    logging.warning(f"Local file {f:20s} is SLIGHTLY newer by  {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD} minutes].  Uploading to remote server.")
2031                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
2032                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
2033                    upload_to_host = True
2034                else:
2035                    logging.warning(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
2036                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
2037                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
2038                    upload_to_host = False
2039
2040            # Cancel the upload if the file is too big for the server.
2041            size = self.local_file_to_size[f]
2042            if size >= self.file_size_limit:
2043                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
2044                upload_to_host = False
2045
2046            # Finally do the file upload.
2047            if upload_to_host:
2048                logging.debug(f"Uploading changed file {f:s}")
2049                # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
2050                print(f"Uploading changed file {f:s}...  ", end='', flush=True)
2051                self.upload(f)
2052
2053        # Remote directory is not in local.  Delete it.
2054        for d in self.remote_only_dirs:
2055            logging.debug(f"Deleting remote only directory {d:s}")
2056            print(f"Deleting remote only directory {d:s}...  ", end='', flush=True)
2057            self.rmdir(d)
2058
2059        # Local directory missing on remote.  Create it.
2060        # Due to breadth first order scan, we'll create parent directories
2061        # before child directories.
2062        for d in self.local_only_dirs:
2063            logging.debug(f"Only on local.  Creating new remote dir {d:s}.")
2064            print(f"Creating new remote directory {d:s}...  ", end='', flush=True)
2065            self.mkdir(d)
2066
2067        # Local file missing on remote.  Upload it.
2068        for f in self.local_only_files:
2069            logging.debug(f"Local only file.  Uploading {f:s} to remote.")
2070
2071            #  But cancel the upload if the file is too big for the server.
2072            size = self.local_file_to_size[f]
2073            if size >= self.file_size_limit:
2074                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d};"
2075                              f" too large for server, limit is {self.file_size_limit:d} bytes")
2076            else:
2077                logging.debug(f"Uploading new file {f:s}")
2078                print(f"Uploading new file {f:s}...  ", end='', flush=True)
2079                self.upload(f)
2080
2081        # Remote contains a file not present on the local.  Delete the file.
2082        for f in self.remote_only_files:
2083            logging.debug(f"Remote only file.  Deleting remote file {f:s}.")
2084            print(f"Deleting remote file {f:s}...  ", end='', flush=True)
2085            self.del_remote(f)
2086
2087    def del_remote(self, relative_file_path):
2088        """Delete a file using ftp."""
2089
2090        logging.debug(f"del_remote():  \trelative file path name: {relative_file_path:s}")
2091
2092        # Parse the relative file path into file name and relative directory.
2093        relative_dir, file_name = os.path.split(relative_file_path)
2094        logging.debug(f"del_remote():  \tfile name: {file_name:s}")
2095        logging.debug(f"del_remote():  \trelative dir: {relative_dir:s}")
2096        logging.debug(f"del_remote():  \tremote root dir: {self.ftp_root_dir:s}")
2097
2098        try:
2099            # Add the remote root path and go to the remote directory.
2100            remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2101            logging.debug(f"del_remote():  \tftp cd remote dir: {remote_dir:s}")
2102            self.ftp.cwd(remote_dir)
2103        except Exception as detail:
2104            logging.error(f"del_remote():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2105        else:
2106            try:
2107                logging.debug(f"del_remote():  \tftp rm: {file_name:s}")
2108
2109                # Don't remove zero length file names.
2110                if len(file_name) > 0:
2111                    self.ftp.delete(file_name)
2112                else:
2113                    logging.warning(
2114                        "fdel_remote():  skipping ftp delete;  file NAME {file_name:s} had zero length")
2115            except Exception as detail:
2116                logging.error(f"del_remote():  \tCannot ftp rm: {str(detail):s}")
2117
2118    def mkdir(self, relative_dir):
2119        """Create new remote directory using ftp."""
2120
2121        logging.debug(f"mkdir():  \trelative dir path name: {relative_dir:s}")
2122        logging.debug(f"mkdir():  \tremote root dir: {self.ftp_root_dir:s}")
2123
2124        # Parse the relative dir path into prefix dir and suffix dir.
2125        path, d = os.path.split(relative_dir)
2126        logging.debug(f"mkdir():  \tremote prefix dir: {path:s}")
2127        logging.debug(f"mkdir():  \tremote dir:  {d:s}")
2128
2129        try:
2130            # Add the remote root path and go to the remote directory.
2131            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2132            logging.debug(f"mkdir():  \tftp cd remote dir: {remote_dir:s}")
2133            self.ftp.cwd(remote_dir)
2134        except Exception as detail:
2135            logging.error(f"mkdir():  \tCannot ftp chrdir: {str(detail):s}  Skipping...")
2136        else:
2137            try:
2138                logging.debug(f"mkdir():  \tftp mkd: {d:s}")
2139                self.ftp.mkd(d)
2140            except Exception as detail:
2141                logging.error(f"mkdir():  \tCannot ftp mkdir: {str(detail):s}")
2142
2143    def rmdir(self, relative_dir):
2144        """Delete an empty directory using ftp."""
2145
2146        logging.debug(f"rmdir():  \tintermediate dir path name: {relative_dir:s}")
2147        logging.debug(f"rmdir():  \tremote root dir: {self.ftp_root_dir:s}")
2148
2149        # Parse the relative dir path into prefix dir and suffix dir.
2150        path, d = os.path.split(relative_dir)
2151        logging.debug(f"rmdir():  \tremote prefix dir: {path:s}")
2152        logging.debug(f"rmdir():  \tremote dir:  {d:s}")
2153
2154        try:
2155            # Add the remote root path and go to the remote directory.
2156            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2157            logging.debug(f"rmdir():  \tftp cd remote dir: {remote_dir:s}")
2158            self.ftp.cwd(remote_dir)
2159        except Exception as detail:
2160            logging.error(f"rmdir():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2161        else:
2162            try:
2163                logging.debug(f"rmdir():  \tftp rmd: {d:s}")
2164                self.ftp.rmd(d)
2165            except Exception as detail:
2166                logging.error(f"rmdir():  \tCannot ftp rmdir dir {d:s}: {str(detail):s}.  Directory is probably not empty.  Do a manual delete.")
2167
2168    def download(self, relative_file_path):
2169        """Download a binary file using ftp."""
2170
2171        logging.debug(f"download():  \tfile name: {relative_file_path:s}")
2172
2173        # Parse the relative file path into file name and relative directory.
2174        relative_dir, file_name = os.path.split(relative_file_path)
2175        logging.debug(f"download():  \tfile name: {file_name:s}")
2176        logging.debug(f"download():  \trelative dir: {relative_dir:s}")
2177        logging.debug(f"download():  \troot dir: {self.ftp_root_dir:s}")
2178
2179        # Add the remote root path and go to the remote directory.
2180        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2181        logging.debug(f"download():  \tftp cd remote dir: {remote_dir:s}")
2182
2183        try:
2184            self.ftp.cwd(remote_dir)
2185        except Exception as detail:
2186            logging.error(f"download():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2187        else:
2188            # Add the local root path to get the local file name.
2189            # Open local binary file to write into.
2190            local_file_name = self.append_root_dir(
2191                self.local_root_dir, relative_file_path)
2192            logging.debug(f"download():  \topen local file name: {local_file_name:s}")
2193            try:
2194                f = open(local_file_name, "wb")
2195                try:
2196                    # Calls f.write() on each block of the binary file.
2197                    # ftp.retrbinary( "RETR " + file_name, f.write )
2198                    pass
2199                except Exception as detail:
2200                    logging.error(f"download():  \tCannot cannot ftp retrbinary: {str(detail):s}")
2201                f.close()
2202            except IOError as detail:
2203                logging.error(f"download():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2204
2205    def upload(self, relative_file_path):
2206        """Upload  a binary file using ftp."""
2207
2208        logging.debug(f"upload():  \trelative file path name: {relative_file_path:s}")
2209
2210        # Parse the relative file path into file name and relative directory.
2211        relative_dir, file_name = os.path.split(relative_file_path)
2212        logging.debug(f"upload():  \tfile name: {file_name:s}")
2213        logging.debug(f"upload():  \trelative dir: {relative_dir:s}")
2214        logging.debug(f"upload():  \tremote root dir: {self.ftp_root_dir:s}")
2215
2216        # Add the remote root path and go to the remote directory.
2217        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2218        logging.debug(f"upload():  \tftp cd remote dir: {remote_dir:s}")
2219
2220        try:
2221            self.ftp.cwd(remote_dir)
2222        except Exception as detail:
2223            logging.error(f"upload():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2224        else:
2225            # Add the local root path to get the local file name.
2226            # Open local binary file to read from.
2227            local_file_name = self.append_root_dir(
2228                self.local_root_dir, relative_file_path)
2229            logging.debug(f"upload():  \topen local file name: {local_file_name:s}")
2230
2231            try:
2232                f = open(local_file_name, "rb")
2233                try:
2234                    # f.read() is called on each block of the binary file until
2235                    # EOF.
2236                    logging.debug(f"upload():  \tftp STOR file {file_name:s}")
2237                    self.ftp.storbinary("STOR " + file_name, f)
2238                except Exception as detail:
2239                    logging.error(f"upload():  \tCannot ftp storbinary: {str(detail):s}")
2240                f.close()
2241            except IOError as detail:
2242                logging.error(f"upload():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2243
2244    def finish(self):
2245        """Log out of an ftp session"""
2246        logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2247        try:
2248            self.ftp.quit()
2249        except Exception as detail:
2250            logging.error(f"Cannot ftp quit because {str(detail):s}")
2251
2252# ----------------------------------------------------------------------------
2253#  Main function
2254# ----------------------------------------------------------------------------
2255
2256def main(raw_args=None):
2257    """Main program.  Clean up and update my website."""
2258
2259    # Print the obligatory legal notice.
2260    print("""
2261    updateweb Version 7.2 - A Python utility program which maintains my web site.
2262    Copyright (C) 2007-2025 by Sean Erik O'Connor.  All Rights Reserved.
2263
2264    It deletes temporary files, rewrites old copyright lines and email address
2265    lines in source files, then synchronizes all changes to my web sites.
2266
2267    updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2268    GNU General Public License.  This is free software, and you are welcome
2269    to redistribute it under certain conditions; see the GNU General Public
2270    License for details.
2271    """)
2272
2273    # Put ALL the main code into a try block!
2274    try:
2275        # ---------------------------------------------------------------------
2276        #  Load default settings and start logging.
2277        # ---------------------------------------------------------------------
2278
2279        # Default user settings.
2280        user_settings = UserSettings()
2281
2282        print( f"Running main( {raw_args} ) Python version\
2283               {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2284               local web directory\
2285               {user_settings.local_root_dir}\n")
2286        # Get command line options such as --verbose.  Pass them back as flags in
2287        # user_settings.
2288        CommandLineSettings(user_settings, raw_args)
2289
2290        # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2291        if user_settings.UNITTEST:
2292            suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2293            unittest.TextTestRunner(verbosity=2).run(suite)
2294            # We are done!
2295            print("  ...done!", flush=True)
2296            return
2297
2298        # Start logging to file.  Verbose turns on logging for
2299        # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
2300        # otherwise we log only WARNING, ERROR, and CRITICAL levels.
2301        if user_settings.VERBOSE:
2302            loglevel = logging.DEBUG
2303        else:
2304            loglevel = logging.WARNING
2305
2306        # Pick the log file name on the host.
2307        if user_settings.CLEAN:
2308            user_settings.LOGFILENAME = "/private/logLocal.txt"
2309        else:
2310            user_settings.LOGFILENAME = "/private/logRemote.txt"
2311
2312        # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2313        if not user_settings.MATHJAX:
2314            user_settings.DIR_TO_SKIP += "|mathjax"
2315        else:
2316            print(f"Processing and uploading mathjax files.  git restore any changed files and git clean -f to remove extra files...  ", end='', flush=True)
2317            print(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually.  If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections...  ", end='', flush=True)
2318            logging.debug(f"Processing and uploading mathjax files.  git restore any changed files and git clean -f to remove extra files.")
2319            logging.debug(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually.  If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections...")
2320
2321        logging.basicConfig(
2322            level=loglevel,
2323            format='%(asctime)s %(levelname)-8s %(message)s',
2324            datefmt='%a, %d %b %Y %H:%M:%S',
2325            filename=user_settings.local_root_dir + user_settings.LOGFILENAME,
2326            filemode='w')
2327
2328        logging.debug("********** Begin logging") 
2329
2330        # ---------------------------------------------------------------------
2331        #  Scan the local website, finding out all files and directories.
2332        # ---------------------------------------------------------------------
2333
2334        # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
2335        print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}...  ", end='', flush=True)
2336        logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2337
2338        local = LocalWebSite(user_settings)
2339        local.scan()
2340
2341        # ---------------------------------------------------------------------
2342        # Clean up local website.
2343        # ---------------------------------------------------------------------
2344
2345        # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2346        print("Cleaning local web site...  ", end='', flush=True)
2347        logging.debug("========================== Cleaning the local web site")
2348        local.clean()
2349
2350        # We are done with the first scan of the local web site and will dispose of it.
2351        local.finish()
2352        del local
2353
2354        # ---------------------------------------------------------------------
2355        #  Rescan the local website since there will be changes to source
2356        #  files from the clean up stage.
2357        # ---------------------------------------------------------------------
2358
2359        print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}", end='', flush=True)
2360        logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2361
2362        local = LocalWebSite(user_settings)
2363
2364        local.scan()
2365
2366        # ---------------------------------------------------------------------
2367        #  List all the local directories and files and their sizes.
2368        # ---------------------------------------------------------------------
2369
2370        # Local website directories.
2371        local_directory_list = local.directories
2372        logging.debug("********** List of all the Local Directories")
2373        for d in local_directory_list:
2374            logging.debug(f"\t {d:s}")
2375
2376        # Generate lists of the local website filenames only, and their sizes in byteskjjjj
2377        local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2378        total_number_of_files = len( local_files_name_size_pairs )
2379        logging.debug(f"********** List of all the Local Files from largest to smallest.  There are {total_number_of_files:15d} files.")
2380        local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2381
2382        # Local website filenames only, and their dates and times.
2383        local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2384        logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2385        for file_datetime_pair in local_file_datetime_pairs:
2386            logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2387
2388        # Total number of bytes in the local files.
2389        total_number_of_bytes = 0
2390        for file_size_pair in local_files_name_size_pairs:
2391            logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2392            total_number_of_bytes += file_size_pair[1]
2393        logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2394
2395        local.finish()
2396
2397        if user_settings.CLEAN:
2398            logging.debug("========================== Done with local file and directory cleanup...")
2399            del local
2400            print("...done!", flush=True)
2401            return
2402
2403        # ---------------------------------------------------------------------
2404        #  Scan the remote hosted web site.
2405        # ---------------------------------------------------------------------
2406
2407        print("Scanning remote web site...", end='', flush=True)
2408        logging.debug("========================== Scanning the remote web site...")
2409
2410        # Pick which website to update.
2411        logging.debug("Connecting to primary remote site.")
2412        remote = RemoteWebSite(user_settings)
2413        remote.scan()
2414        remote.finish()
2415
2416        # ---------------------------------------------------------------------
2417        #  List all the remote server directories and files and their sizes.
2418        # ---------------------------------------------------------------------
2419
2420        remote_directory_list = remote.directories
2421        logging.debug("********** Remote Directories")
2422        for d in remote_directory_list:
2423            logging.debug(f"\t {d:s}")
2424
2425        # Local website filenames only, and their sizes in bytes.
2426        remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2427        total_number_of_files = len( remote_files_name_size_list )
2428        logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2429        remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2430        total_number_of_bytes = 0
2431        for file_size in remote_files_name_size_list:
2432            logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2433            total_number_of_bytes += file_size[1]
2434        logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2435
2436        # ---------------------------------------------------------------------
2437        # Synchronize the local and remote web sites.
2438        # ---------------------------------------------------------------------
2439
2440        print("Synchronizing remote and local web sites...", end='', flush=True)
2441        logging.debug("========================= Synchronizing remote and local web sites...")
2442
2443        # Primary website.
2444        logging.debug("Connecting to primary remote site for synchronization.")
2445        sync = UpdateWeb(user_settings,
2446                         local.directories,
2447                         local.files,
2448                         remote.directories,
2449                         remote.files)
2450
2451        sync.update()
2452        sync.finish()
2453
2454        del sync
2455        del remote
2456        del local
2457        print("...done!", flush=True)
2458
2459    except UpdateWebException as detail:
2460        logging.error(f"Couldn't update the web directory:  {str(detail):s}.  Aborting...")
2461
2462    except RecursionError as detail:
2463        logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}.  Aborting...")
2464
2465if __name__ == '__main__':
2466    """Python executes all code in this file.  Finally, we come here.  
2467
2468    * If we are executing this file as a standalone Python script, 
2469      the name of the current module is set to __main__ and thus we'll call the main() function.
2470
2471    * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2472
2473        import updateweb
2474        updateweb.main(["--test"])"""
2475
2476    main()