1#!/usr/bin/env python3
2# ============================================================================
3#
4# NAME
5#
6# updateweb.py
7#
8# DESCRIPTION
9#
10# Python script which updates my web sites.
11#
12# It does miscellaneous cleanup on my local copy of the web site on disk,
13# including updating copyright information, then synchronizes the local
14# copy to my remote server web sites using FTP.
15#
16# USAGE
17#
18# It's best to use the associated makefile.
19# But you can call this Python utility from the command line,
20#
21# $ python updateweb.py Clean up my local copy, then use it
22# to update my remote web server site.
23# Log warnings and errors.
24# $ python updateweb.py -v Same, but log debug messages also.
25# $ python updateweb.py -c Clean up my local copy only.
26# $ python updateweb.py -t Run unit tests only.
27# $ python updateweb.py -m Upload MathJax files (only need to do this once).
28#
29# We get username and password information from the file PARAMETERS_FILE.
30#
31# Logs are written to the files,
32#
33# logLocal.txt Local web site cleanup log.
34# logRemote.txt Remote web server update log.
35#
36# AUTHOR
37#
38# Sean E. O'Connor 23 Aug 2007 Version 1.0 released.
39#
40# LEGAL
41#
42# updateweb.py Version 7.1 - A Python utility program which maintains my web site.
43# Copyright (C) 2007-2024 by Sean Erik O'Connor. All Rights Reserved.
44#
45# This program is free software: you can redistribute it and/or modify
46# it under the terms of the GNU General Public License as published by
47# the Free Software Foundation, either version 3 of the License, or
48# (at your option) any later version.
49#
50# This program is distributed in the hope that it will be useful,
51# but WITHOUT ANY WARRANTY; without even the implied warranty of
52# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
53# GNU General Public License for more details.
54#
55# You should have received a copy of the GNU General Public License
56# along with this program. If not, see <http://www.gnu.org/licenses/>.
57#
58# The author's address is seanerikoconnor!AT!gmail!DOT!com
59# with !DOT! replaced by . and the !AT! replaced by @
60#
61# NOTES
62#
63# DOCUMENTATION
64#
65# Python interpreter: https://www.python.org/
66# Python tutorial and reference: https://docs.python.org/lib/lib.html
67# Python debugger: https://docs.python.org/3/library/pdb.html
68# Python regular expression howto: https://docs.python.org/3.7/howto/regex.html
69#
70# ============================================================================
71
72# ----------------------------------------------------------------------------
73# Load Python Packages
74# ----------------------------------------------------------------------------
75
76# OS stuff
77import sys
78import os
79import argparse
80import shutil
81from pathlib import Path
82
83# Regular expressions
84import re
85
86# FTP stuff
87import ftplib
88
89# Date and time
90import time
91import stat
92import datetime
93
94# Logging
95import logging
96
97# Unit testing
98import unittest
99
100# Enumerated types (v3.4)
101from enum import Enum
102from typing import List, Any
103
104# YAML configuration files (a superset of JSON!)
105import yaml
106# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
107try:
108 from yaml import CLoader as Loader
109except ImportError:
110 from yaml import Loader
111
112# Python syntax highlighter. See https://pygments.org
113from pygments import highlight
114from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
115from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
116from pygments.formatters import HtmlFormatter
117
118
119# ----------------------------------------------------------------------------
120# Custom Top Level Exceptions.
121# ----------------------------------------------------------------------------
122
123class UpdateWebException(Exception):
124 """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
125 Derive from Exception as recommended by Python manual"""
126 pass
127
128# ----------------------------------------------------------------------------
129# User settings.
130# ----------------------------------------------------------------------------
131
132class TreeWalkSettings(Enum):
133 """Enum types for how to walk the directory tree."""
134 BREADTH_FIRST_SEARCH = 1
135 DEPTH_FIRST_SEARCH = 2
136
137class FileType(Enum):
138 """'Enum' types for properties of directories and files."""
139 DIRECTORY = 0
140 FILE = 1
141 ON_LOCAL_ONLY = 2
142 ON_REMOTE_ONLY = 3
143 ON_BOTH_LOCAL_AND_REMOTE = 4
144
145class UserSettings:
146 """Megatons of user selectable settings."""
147 # Logging control.
148 LOGFILENAME = ""
149 VERBOSE = False # Verbose mode. Prints out everything.
150 CLEAN = False # Clean the local website only.
151 UNITTEST = False # Run a unit test of a function.
152 MATHJAX = False # Process and upload MathJax files to server.
153
154 # When diving into the MathJax directory, web walking the deep directories
155 # may exceed Python's default recursion limit of 1000.
156 RECURSION_DEPTH = 5000
157 sys.setrecursionlimit(RECURSION_DEPTH)
158
159 # Fields in the file information (file_info) structure.
160 # For example, file_info =
161 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
162 # 1, -- Enum type: Is it a file? dir? on local? on remote? on both?
163 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a datetime class.
164 # 4675] -- File size in bytes.
165 FILE_NAME = 0
166 FILE_TYPE = 1
167 FILE_DATE_TIME = 2
168 FILE_SIZE = 3
169
170 # Server settings.
171 SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
172 SERVER_NAME = None
173 USER_NAME = None
174 PASSWORD_NAME = None
175 FTP_ROOT_NAME = None
176 FILE_SIZE_LIMIT_NAME = None
177
178 # Map month names onto numbers.
179 monthToNumber = {
180 'Jan': 1,
181 'Feb': 2,
182 'Mar': 3,
183 'Apr': 4,
184 'May': 5,
185 'Jun': 6,
186 'Jul': 7,
187 'Aug': 8,
188 'Sep': 9,
189 'Oct': 10,
190 'Nov': 11,
191 'Dec': 12}
192
193 # List of directories to skip over when processing or uploading the web page.
194 # Some are private but most are dir of temporary files.
195 # They will be listed as WARNING in the log.
196 # Examples:
197 # My private admin settings directory.
198 # Git or SVN local admin directories.
199 # Compile build directories fromXCode.
200 # PyCharm build directories.
201 # Python cache directories.
202 # Jupyter checkpoint directories.
203 # XCode temporary file crap.
204 DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
205
206 # List of files to skip when processing or uploading to the web page.
207 # They will be listed as WARNING in the log.
208 # Examples:
209 # MathJax yml file.
210 # .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
211 FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
212
213 # Suffixes for temporary files which will be deleted during the cleanup
214 # phase.
215 TEMP_FILE_SUFFIXES = r""" # Use Python raw strings.
216 \. # Match the dot in the file name.
217 # Now begin matching the file name suffix.
218 # (?: non-capturing match for the regex inside the parentheses,
219 # i.e. matching string cannot be retrieved later.
220 # Now match any of the following file extensions:
221 (?: o | obj | lib | # Object files generated by C, C++, etc compilers
222 pyc | # Object file generated by the Python compiler
223 ilk | pdb | sup | # Temp files from VC++ compiler
224 idb | ncb | opt | plg | # Temp files from VC++ compiler
225 sbr | bsc | map | bce | # Temp files from VC++ compiler
226 res | aps | dep | db | # Temp files from VC++ compiler
227 jbf | # Paintshop Pro
228 class | jar | # Java compiler
229 fas | # CLISP compiler
230 swp | swo | # Vim editor
231 toc | aux | # TeX auxilliary files (not .synctex.gz or .log)
232 DS_Store | _\.DS_Store | # macOS finder folder settings.
233 _\.Trashes | # macOS recycle bin
234 gdb_history) # GDB history
235 $ # Now we should see only the end of line.
236 """
237
238 # Special case: Vim temporary files contain a twiddle anywhere in the
239 # name.
240 VIM_TEMP_FILE_EXT = "~"
241
242 # Suffixes for temporary directories which should be deleted during the
243 # cleanup phase.
244 TEMP_DIR_SUFFIX = r""" # Use Python raw strings.
245 (?: Debug | Release | # C++ compiler
246 ipch | \.vs | # Temp directories from VC++ compiler
247 \.Trashes | \.Trash) # macOS recycle bin
248 $
249 """
250
251 # File extension for an internally created temporary file.
252 TEMP_FILE_EXT = ".new"
253
254 # Identify source file types.
255 HYPERTEXT_FILE_PATTERN = r""" # Use Python raw strings.
256 (\. # Match the filename suffix after the .
257 (?: html | htm | # HTML hypertext
258 css) # CSS style sheet
259 $) # End of line.
260 """
261
262 SOURCE_FILE_PATTERN = r""" # Use Python raw strings.
263 (?: makefile$ | # Any file called makefile is a source file.
264 # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
265 .vimrc$ | # Vim script
266 (.bashrc$ | # Bash configuration files.
267 .bash_profile$ |
268 .bash_logout$)
269 |
270 (.gitignore$ | # Git configuration files.
271 .gitignore_global$ |
272 .gitconfig$)
273 |
274 (\. # Match the filename suffix after the .
275 # Now match any of these suffixes:
276 (?:
277 c | cpp | h | hpp | # C++ and C
278 js | # Javascript
279 py | # Python
280 lsp | # LISP
281 m | # MATLAB
282 FOR | for | f | # FORTRAN
283 yaml | # YAML = JSON superset
284 tex | # LaTeX
285 txt | dat | # Data files
286 sh) # Bash
287 $) # End of line.
288 )
289 """
290
291 # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
292 SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
293 (?: ^life\.html$ | # We want a listing of this particular HTML file.
294 ^index\.html$ | # I want to list my top level HTML file. (There is only one file with this name at the top level web directory.)
295 ^webPageDesign\.html$ | # and also this HTML example file, but no others.
296 ^StyleSheet\.css$ ) # I want to list my style sheet.
297 """
298
299 # Files for which we want to generate a syntax highlighted source code listing.
300 # Uses an f-string combined with a raw-string.
301 FILE_TO_HIGHLIGHT_PATTERN = fr"""
302 (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} |
303 {SOURCE_FILE_PATTERN} )
304 """
305
306 # Update my email address.
307 # This is tricky: Prevent matching and updating the name within in this
308 # Python source file by using the character class brackets.
309 OLD_EMAIL_ADDRESS = r"""
310 artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
311 """
312 NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
313
314 # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.
315 # Read patterns and strings from the updateweb.yaml file.
316 STRING_REPLACEMENT_LIST = []
317 # Pairs of test strings and their correct match/replacements.
318 STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
319
320 # Change all old software version lines of the form
321 # Primpoly Version nnnn.nnnn
322 # to the new software version.
323 # Note that since we are using raw strings leading and trailing whitespace
324 # is ignored in both pattern and replacement.
325 CURRENT_SOFTWARE_VERSION = r"""
326 Primpoly
327 \s+
328 Version
329 \s+
330 ([0-9]+) # The two part version number NNN.nnn
331 \.
332 ([0-9]+)
333 """
334 NEW_SOFTWARE_VERSION = r"""
335 Primpoly Version 16.3
336 """
337
338 # Match a copyright line. Then extract the copyright symbol which can be
339 # ascii (C) or HTML © and extract the old year.
340 TWO_DIGIT_YEAR_FORMAT = "%02d"
341 COPYRIGHT_LINE = r"""
342 Copyright # Copyright.
343 \s+ # One or more spaces.
344 (?P<symbol> \(C\) | ©) # Match and extract the copyright symbol.
345 \D+ # Any non-digits.
346 (?P<old_year>[0-9]+) # Match and extract the old copyright year,
347 # then place it into variable 'old_year'
348 - # to
349 ([0-9]+) # New copyright year.
350 """
351
352 # Match a line containing the words,
353 # last updated YY
354 # and extract the two digit year YY.
355 LAST_UPDATED_LINE = r"""
356 last\s+ # Match the words "last updated"
357 updated\s+
358 \d+ # Day number
359 \s+ # One or more blanks or tab(
360 [A-Za-z]+ # Month
361 \s+ # One or more blanks or tabs
362 (?P<year>\d+) # Two digit year. Place it into the variable 'year'
363 """
364
365 # Web server root directory.
366 DEFAULT_ROOT_DIR = "/"
367
368 # The ftp listing occasionally shows a date newer than the actual date.
369 # On my server, it could be 6 months newer when we are near New Year's Day. Typically the server file time is only a 1 or 2 minutes newer.
370 # But if the remote file time is much newer, it might be an old file with a bad date/time.
371 # Upload the file to be safe.
372 # How to see the time differences from the log if they are large:
373 # egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
374 # Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
375 # How to see the time differences from the log if they are small and we wait and NOT upload:
376 # egrep -o "Remote file.*is newer.*days" logRemote.txt
377 # Remote file error404.html is newer by 102.0 seconds = 1.7 minutes = 0.0 hours = 0.0 days
378 # Remote file index.html is newer by 113.0 seconds = 1.9 minutes = 0.0 hours = 0.0 days
379 MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
380 DAYS_NEWER_FOR_REMOTE_BEFORE_UPLOAD = (1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD
381
382 # Upload only if we are newer by more than a few minutes. Allows for a little slop in time stamps on server or host.
383 MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
384 DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD = (1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD
385
386 # An ftp list command line should be at least this many chars, or we'll
387 # suspect and error.
388 MIN_FTP_LINE_LENGTH = 7
389
390 # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
391 # ftp listings are generally similar to UNIX ls -l listings.
392 #
393 # Some examples:
394 #
395 # (1) Freeservers ftp listing,
396 #
397 # 0 1 2 3 4 5 6 7 8
398 # drwxr-xr-x 3 1000 1000 4096 Nov 18 2006 Electronics
399 # -rw-r--r-- 1 1000 1000 21984 Jun 4 03:46 StyleSheet.css
400 # -rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html
401 #
402 # (2) atspace ftp listing,
403 #
404 # drwxr-xr-x 3 seanerikoconnor vusers 49 Apr 7 2006 Electronics
405 # -rw-r--r-- 1 seanerikoconnor vusers 21984 Jun 4 04:03 StyleSheet.css
406 #
407 FTP_LISTING = r"""
408 [drwx-]+ # Unix type file mode.
409 \s+ # One or more blanks or tabs.
410 \d+ # Number of links.
411 \s+
412 \w+ # Owner.
413 \s+
414 \w+ # Group.
415 \s+
416 (?P<bytes> \d+) # File size in bytes, placed into the variable 'bytes'.
417 \s+
418 (?P<mon> \w+) # Month modified, placed into the variable 'mon'.
419 \s+
420 (?P<day> \d+) # Day modified, placed into the variable 'day'.
421 \s+
422 (
423 (?P<hour> \d+) # Hour modified, placed into the variable 'hour'.
424 :
425 (?P<min> \d+) # Minute modified, placed into the variable 'min'.
426 |
427 (?P<year> \d+) # If hours and minutes are absent (happens when year is not the current year),
428 # extract the year instead.
429 )
430 \s+
431 (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+) # Path and file name containing letters, numbers,
432 # and funny characters. We must escape some of
433 # these characters with a backslash, \.
434 """
435
436 # HTML header up to the style sheet.
437 BASIC_HTML_BEGIN = \
438 """
439 <!DOCTYPE html>
440 <html lang="en-US"> <!-- Set language of this page to USA English. -->
441
442 <head>
443 <!-- This page uses Unicode characters. -->
444 <meta charset="utf-8">
445
446 <!-- Set viewport to actual device width. Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
447 <meta name="viewport" content="width=device-width, initial-scale=1">
448
449 <!-- Title appears in the web browser tab for this page. The browser also uses it to bookmark this page. -->
450 <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
451
452 <!-- Search engines will search using words in this description. They will also display title in their search results. -->
453 <meta name="description" content="Syntax Colored Source Code Listing">
454
455 <!-- Some content management software uses the author's name. -->
456 <meta name="author" content="Sean Erik O'Connor">
457
458 <meta name="copyright" content="Copyright (C) 1986-2024 by Sean Erik O'Connor. All Rights Reserved.">
459
460 <!-- Begin style sheet insertion -->
461 <style>
462 /* Default settings for all my main web pages. */
463 body
464 {
465 /* A wide sans-serif font is more readable on the web. */
466 font-family: Verdana, Geneva, "Trebuchet MS", sans-serif ;
467
468 /* Set the body font size a little smaller than the user's default browser setting. */
469 font-size: 0.8em ;
470
471 /* Black text is easier to read. */
472 color: black ;
473
474 /* More vertical space between lines for more pleasant reading. Use a unitless font height multiplier.
475 Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
476 line-height: 1.7 ;
477 }
478
479 <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
480 """
481
482 # After the style sheet and up to the start of the article in the body.
483 BASIC_HTML_MIDDLE = \
484 """
485 </style>
486 </head>
487
488 <body>
489 <article class="content">
490 """
491
492 # After the source code listing, finish the article, body and html document.
493 BASIC_HTML_END = \
494 """
495 </article>
496 </body>
497
498 </html>
499 """
500
501 def __init__(self):
502 """Set up the user settings."""
503
504 self.local_root_dir = ""
505
506 # Import the user settings from the parameter file.
507 self.get_local_root_dir()
508 self.get_server_settings()
509
510 self.precompile_regular_expressions()
511
512 def get_server_settings(self):
513 """
514 Read web account private settings from a secret offline parameter file.
515 These also hold patterns to match and replace in all of our source pages.
516 """
517
518 # Private file which contains my account settings.
519 settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
520 # Recommended by
521 # https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
522 try:
523 stream = open(settings_file_name, "r")
524 except OSError as detail:
525 logging.error(f"Cannot open the YAML file {settings_file_name:s}. Unable to read the settings because: {str(detail):s}")
526 # Rethrow the exception higher.
527 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ") from detail
528 # Read all the YAML documents in the file.
529 yaml_contents = yaml.load_all(stream, Loader)
530 yaml_document_list: list[Any] = []
531 for yaml_doc in yaml_contents:
532 yaml_document_list.append(yaml_doc)
533 num_yaml_docs = len(yaml_document_list)
534 if num_yaml_docs != 2:
535 logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file. Aborting...")
536 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ")
537
538 # Load all the server settings.
539 self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
540 self.USER_NAME = yaml_document_list[0]['ftp_user_name']
541 self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
542 self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
543 self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
544
545 # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
546 self.STRING_REPLACEMENT_LIST = []
547 pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
548 for pat_rep in pat_rep_yaml_list:
549 # Fetch the regular expression and compile it for speed.
550 verbose_regex = pat_rep['pattern']
551 pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
552 # Since we use raw strings, we need to strip off leading and trailing whitespace.
553 replacement_string = pat_rep['replacement_string'].strip().lstrip()
554 self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
555
556 # Load the test and verify strings.
557 test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
558 for test_verify_string in test_verify_strings_list:
559 test_string = test_verify_string['test_string'].strip().lstrip()
560 verify_string = test_verify_string['verify_string'].strip().lstrip()
561 self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
562
563 print(" ...done!", flush=True)
564 return
565
566 def get_local_root_dir(self):
567 """Get the local website root directory on this platform."""
568
569 # Each platform has a definite directory for the web page.
570 local_web_dir_path = "/Desktop/Sean/WebSite"
571
572 if sys.platform.startswith('darwin'):
573 self.local_root_dir = str(Path.home()) + local_web_dir_path
574 # My Cyperpower PC running Ubuntu Linux.
575 elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
576 self.local_root_dir = str(Path.home()) + local_web_dir_path
577 return
578
579 def precompile_regular_expressions(self):
580 """For speed precompile the regular expression search patterns."""
581 self.COPYRIGHT_LINE = re.compile(self.COPYRIGHT_LINE, re.VERBOSE | re.IGNORECASE)
582 self.CURRENT_SOFTWARE_VERSION = re.compile(self.CURRENT_SOFTWARE_VERSION, re.VERBOSE | re.IGNORECASE)
583 self.FTP_LISTING = re.compile(self.FTP_LISTING, re.VERBOSE | re.IGNORECASE)
584 self.TEMP_FILE_SUFFIXES = re.compile(self.TEMP_FILE_SUFFIXES, re.VERBOSE | re.IGNORECASE)
585 self.TEMP_DIR_SUFFIX = re.compile(self.TEMP_DIR_SUFFIX, re.VERBOSE)
586 self.SOURCE_FILE_PATTERN = re.compile(self.SOURCE_FILE_PATTERN, re.VERBOSE)
587 self.HYPERTEXT_FILE_PATTERN = re.compile(self.HYPERTEXT_FILE_PATTERN, re.VERBOSE)
588 self.OLD_EMAIL_ADDRESS = re.compile(self.OLD_EMAIL_ADDRESS, re.VERBOSE | re.IGNORECASE)
589 self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
590 self.LAST_UPDATED_LINE = re.compile(self.LAST_UPDATED_LINE, re.VERBOSE | re.IGNORECASE)
591
592# ----------------------------------------------------------------------------
593# Unit test individual functions.
594# ----------------------------------------------------------------------------
595
596class UnitTest(unittest.TestCase):
597 """Initialize the UnitTest class."""
598 def setUp(self):
599 self.user_settings = UserSettings()
600 self.user_settings.get_local_root_dir()
601
602 def tearDown(self):
603 """Clean up the UnitTest class."""
604 self.user_settings = None
605
606 def test_copyright_updating(self):
607 """Test copyright line updating to the current year."""
608 # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
609 line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2024 by Sean Erik O'Connor"
610 line_after_update_actual = "Copyright (C) 1999-2024 by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2024 by Sean Erik O'Connor"
611 pat = self.user_settings.COPYRIGHT_LINE
612 match = pat.search(line_before_update)
613
614 if match:
615 old_year = int(match.group('old_year'))
616 # Same as call to self.get_current_year():
617 current_year = int(time.gmtime()[0])
618 if old_year < current_year:
619 # We matched and extracted the old copyright symbol into the variable
620 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
621 # We now insert it back by placing the special syntax
622 # \g<symbol> into the replacement string.
623 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
624 str(current_year)
625 line_after_update_computed = pat.sub(new_copyright, line_before_update)
626 self.assertEqual(
627 line_after_update_actual,
628 line_after_update_computed,
629 f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
630 else:
631 self.fail()
632 else:
633 self.fail()
634
635 def test_update_software_version(self):
636 """Test updating to a new version of Primpoly."""
637 # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
638 old_version_line = "| Primpoly Version 00." + "0 - A Program for Computing Primitive Polynomials.|"
639 new_version_line = "| Primpoly Version 16." + "3 - A Program for Computing Primitive Polynomials.|"
640 pat = self.user_settings.CURRENT_SOFTWARE_VERSION
641 match = pat.search(old_version_line)
642 if match:
643 # Note that since we are using raw strings leading and trailing
644 # whitespace is ignored.
645 new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
646 updated_version_line = pat.sub(new_version, old_version_line)
647 self.assertEqual(updated_version_line, new_version_line, f"updated version line = {updated_version_line:s} new line = {new_version_line:s}")
648 else:
649 self.fail()
650
651 def test_extract_filename_from_ftp_listing(self):
652 """Test parsing an FTP listing."""
653 ftp_line = "-rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html"
654 extracted_file_name = "allclasses-frame.html"
655 pat = self.user_settings.FTP_LISTING
656 match = pat.search(ftp_line)
657 if match:
658 filename = match.group('filename')
659 self.assertEqual(
660 filename,
661 extracted_file_name,
662 f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
663 else:
664 self.fail()
665
666 def test_get_file_time_and_date(self):
667 """Test getting a file time and date."""
668 # Point to an old file.
669 file_name = "./Images/home.png"
670 full_file_name = self.user_settings.local_root_dir + '/' + file_name
671 # Get the UTC time.
672 file_epoch_time = os.path.getmtime(full_file_name)
673 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
674 # Create a datetime object for the file.
675 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
676 # Check if the file time matches what we would see if we did ls -l <file_name>
677 computed = f"file {file_name:s} datetime {d.ctime():s}"
678 actual = "file ./Images/home.png datetime Thu Jul 18 16:55:44 2024"
679 self.assertEqual(computed, actual)
680
681 def test_set_file_time_and_date(self):
682 """Test setting a file time and date."""
683 file_name = "./Images/home.png"
684 full_file_name = self.user_settings.local_root_dir + '/' + file_name
685 # Create a temporary file in the same directory.
686 temp_file_name = "temporal.tmp"
687 full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
688 try:
689 with open(full_temp_file_name, 'w') as fp:
690 fp.write("The End of Eternity")
691 except OSError as detail:
692 logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s} Aborting...")
693 raise UpdateWebException("Failed the unit test for setting time and date of a file. See the log file for details. Aborting...") from detail
694 # Get the old file time. Set the temporary file to the same time.
695 file_stat = os.stat(full_file_name)
696 os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
697 # What is the temporary file's time now?
698 file_epoch_time = os.path.getmtime(full_temp_file_name)
699 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
700 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
701 # Is the temporary file time set properly?
702 computed = f"file {file_name:s} datetime {d.ctime():s}"
703 actual = "file ./Images/home.png datetime Thu Jul 18 16:55:44 2024"
704 self.assertEqual(computed, actual)
705 os.remove(full_temp_file_name)
706
707 def test_difference_of_time_and_date(self):
708 """Test a date difference calculation."""
709 file_name = "./Images/home.png"
710 full_file_name = self.user_settings.local_root_dir + '/' + file_name
711 # Get the UTC time.
712 file_epoch_time = os.path.getmtime(full_file_name)
713 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
714 # Create a datetime object for the file.
715 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
716 # Slightly change the date and time by adding 1 minute.
717 d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1) # year, month, day, hour, minute, second
718 time_delta = d2 - d
719 seconds_different = time_delta.total_seconds()
720 minutes_different = seconds_different / 60.0
721 hours_different = minutes_different / 60.0
722 days_different = hours_different / 24.0
723 computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
724 actual = "difference 0.00001 days, 0.00028 hours 0.01667 minutes, 1.00000 seconds"
725 self.assertEqual(computed, actual)
726
727 def test_pattern_match_dir_to_skip(self):
728 """Test if skipping certain named directories is recoginizing the dir names."""
729 dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
730 pat = re.compile(self.user_settings.DIR_TO_SKIP)
731 if pat.search(dir_skip):
732 self.assertTrue(True)
733 else:
734 self.assertTrue(False)
735
736 def test_file_name_to_syntax_highlight(self):
737 """Test if syntax highlighting recognizes file names to highlight."""
738 file_name1 = "Computer/hello.lsp"
739 file_name2 = "Computer/life.html"
740 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
741 if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
742 self.assertTrue(True)
743 else:
744 self.assertTrue(False)
745
746 def test_user_settings(self):
747 """Test whether user settings are correctly initialized."""
748 computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
749 actual = "File size limit = 50000 K"
750 self.assertEqual(computed, actual, "File size limit settings are incorrect.")
751
752 def test_check_replace_substring(self,debug=True):
753 """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
754 For troubleshooting, turn on debug.
755 """
756 test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
757 # Iterate over all test strings.
758 for pair in test_verify_pairs:
759 [test_string, verify_string] = pair
760 if debug:
761 print( f">>>>>>> next test string = {test_string}")
762 print( f">>>>>>> next verify string = {verify_string}")
763 # Iterate over all patterns and replacements.
764 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
765 [pat, rep_string] = match_replace_tuple
766 print( f"\t-------> next pattern = {pat}")
767 print( f"\t-------> next replacement = {rep_string}")
768 match = pat.search(test_string)
769 # The pattern match succeeds.
770 if match:
771 try:
772 sub = pat.sub(rep_string, test_string)
773 # String replacement succeeds for this pattern/replace pair iteration.
774 if debug:
775 print( f"\t\t.......> match and replace: {test_string} ---> {sub}")
776 test_string = sub
777 except IndexError as detail:
778 print(f"\t\t.......> Caught an exception: {str(detail):s}. Replacement failed.")
779 if debug:
780 self.assertTrue(False)
781 elif debug:
782 print( f"\t\t.......> no match for pattern = {pat} in test string = {test_string}")
783 # No match, so go on to the next pattern and don't change test_string.
784 # Done with all pattern/replace on test string.
785 # Check this test string in the list.
786 self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
787 if debug:
788 print( f"\t******* DONE with all pattern matches and replacements on this test/verify string pair.\n" )
789
790# ----------------------------------------------------------------------------
791# Command line options.
792# ----------------------------------------------------------------------------
793
794class CommandLineSettings(object):
795 """Get the command line options."""
796
797 def __init__(self, user_settings, raw_args=None):
798 """Get command line options"""
799 command_line_parser = argparse.ArgumentParser(
800 description="updateweb options")
801
802 # Log all changes, not just warnings and errors.
803 command_line_parser.add_argument(
804 "-v",
805 "--verbose",
806 help="Turn on verbose mode to log everything",
807 action="store_true")
808
809 # Clean up the local website only.
810 command_line_parser.add_argument(
811 "-c",
812 "--clean",
813 help="Do a cleanup on the local web site only.",
814 action="store_true")
815
816 # Clean up the local website only.
817 command_line_parser.add_argument(
818 "-m",
819 "--mathjax",
820 help="""ALSO upload mathjax directory.\
821 Do this if you have a new version of MathJax or if you've reset your server and deleted the /mathjax remote directory.\
822 You'll need to do git restore on any altered files in the local branch and git clean -f to remove any newly created files.\
823 NOTE: If you did reset your server and delete all files, run the command find . -name '*.*' -exec touch {} \\; from the web page root directory.\
824 Also run find . -name '*' -exec touch {} \\; This will ensure accurate times on the server.""",
825 action="store_true")
826
827 # Run unit tests only.
828 command_line_parser.add_argument("-t", "--test",
829 help="Run unit tests.",
830 action="store_true")
831
832 args = command_line_parser.parse_args(raw_args)
833
834 if args.verbose:
835 user_settings.VERBOSE = True
836 if args.clean:
837 user_settings.CLEAN = True
838 if args.test:
839 user_settings.UNITTEST = True
840 if args.mathjax:
841 user_settings.MATHJAX = True
842
843# ----------------------------------------------------------------------------
844# Base class which describes my web site overall.
845# ----------------------------------------------------------------------------
846
847class WebSite(object):
848 """
849 Abstract class used for analyzing both local and remote (ftp server) websites.
850 Contains the web-walking functions which traverse the directory structures and files.
851 These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
852 Child classes may define additional functions which only they need.
853 """
854
855 def __init__(self, settings):
856 """Set up root directories"""
857
858 # Import the user settings.
859 self.user_settings = settings
860
861 # Queue keeps track of directories not yet processed.
862 self.queue = []
863
864 # List of all directories traversed.
865 self.directories = []
866
867 # List of files traversed, with file information.
868 self.files = []
869
870 # Find out the root directory and go there.
871 self.root_dir = self.get_root_dir()
872 self.go_to_root_dir(self.root_dir)
873
874 @staticmethod
875 def get_current_year():
876 """Get the current year."""
877 return int(time.gmtime()[0])
878
879 @staticmethod
880 def get_current_two_digit_year():
881 """Get the last two digits of the current year."""
882 return WebSite.get_current_year() % 100
883
884 @staticmethod
885 def is_file_info_type(file_info):
886 """Check if we have a file information structure or merely a simple file name."""
887 try:
888 if isinstance(file_info, list):
889 return True
890 elif isinstance(file_info, str):
891 return False
892 else:
893 logging.error("is_file_info_type found a bad type. Aborting...")
894 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ")
895 except TypeError as detail:
896 logging.error(f"is_file_info_type found a bad type {str(detail):s}. Aborting...")
897 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ") from detail
898
899 def get_root_dir(self):
900 """Subclass: Put code here to get the root directory"""
901 return ""
902
903 def go_to_root_dir(self, root_dir):
904 """Subclass: Put code here to go to the root directory"""
905 pass # Pythons's do-nothing statement.
906
907 def one_level_down(self, d):
908 """Subclass: Fill in with a method which returns a list of the
909 directories and files immediately beneath dir"""
910 return [], []
911
912 def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
913 """Walk a directory in either depth first or breadth first order. BFS is the default."""
914
915 # Get all subfiles and subdirectories off this node.
916 subdirectories, subfiles = self.one_level_down(d)
917
918 # Add all the subfiles in order.
919 for f in subfiles:
920
921 name = self.strip_root(f)
922 logging.debug(f"Webwalking: Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
923
924 # Some files are private so skip them from consideration.
925 pat = re.compile(self.user_settings.FILE_TO_SKIP)
926
927 if pat.search(name[self.user_settings.FILE_NAME]):
928 logging.warning(
929 f"Webwalking: Skipping private file {name[self.user_settings.FILE_NAME]:s}")
930 # Don't upload the log file due to file locking problems.
931 elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
932 logging.debug(f"Webwalking: Skipping log file {name[self.user_settings.FILE_NAME]:s}")
933 # File size limit on some servers.
934 else:
935 self.files.append(name)
936
937 # Queue up the subdirectories.
938 for d in subdirectories:
939 # Some directories are private such as .git or just temporary file
940 # caches so skip them from consideration.
941 pat = re.compile(self.user_settings.DIR_TO_SKIP)
942 if pat.search(d):
943 logging.warning(f"Webwalking: Skipping private dir {d:s}")
944 else:
945 logging.debug(f"Webwalking: Pushing dir {d:s} on the queue.")
946 self.queue.append(d)
947
948 # Search through the directories.
949 while len(self.queue) > 0:
950 # For breadth first search, remove from beginning of queue.
951 if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
952 d = self.queue.pop(0)
953
954 # For depth first search, remove from end of queue.
955 elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
956 d = self.queue.pop()
957 else:
958 d = self.queue.pop(0)
959
960 name = self.strip_root(d)
961 logging.debug(f"Webwalking: Adding relative directory {name:s} to list, full path = {d:s}.")
962 self.directories.append(name)
963
964 self.walk(d)
965
966 def strip_root(self, file_info):
967 """Return a path, but strip off the root directory"""
968
969 root = self.root_dir
970
971 # Extract the file name.
972 if self.is_file_info_type(file_info):
973 name = file_info[self.user_settings.FILE_NAME]
974 else:
975 name = file_info
976
977 # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
978 # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
979 # Art/foo.txt
980 lenroot = len(root)
981 if root == self.user_settings.DEFAULT_ROOT_DIR:
982 pass
983 else:
984 lenroot = lenroot + 1
985
986 stripped_path = name[lenroot:]
987
988 if self.is_file_info_type(file_info):
989 # Update the file name only.
990 return [stripped_path,
991 file_info[self.user_settings.FILE_TYPE],
992 file_info[self.user_settings.FILE_DATE_TIME],
993 file_info[self.user_settings.FILE_SIZE]]
994 else:
995 return stripped_path
996
997 def append_root_dir(self, root_dir, name):
998 """Append the root directory to a path"""
999
1000 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1001 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1002 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1003 return root_dir + name
1004 else:
1005 return root_dir + "/" + name
1006
1007 def scan(self):
1008 """Scan the directory tree recursively from the root"""
1009 logging.debug(f"Webwalking: Beginning recursive directory scan from root directory {self.root_dir:s}")
1010 self.walk(self.root_dir)
1011
1012 def modtime(self, f):
1013 """Subclass: Get file modification time"""
1014 pass
1015
1016 def finish(self):
1017 """Quit web site"""
1018 logging.debug(f"Finished with WebSite object of class {type(self)}")
1019 pass
1020
1021# ----------------------------------------------------------------------------
1022# Subclass which knows about the local web site on disk.
1023# ----------------------------------------------------------------------------
1024
1025class LocalWebSite(WebSite):
1026 """Walk the local web directory on local disk down from the root.
1027 Clean up temporary files and do other cleanup work."""
1028
1029 def __init__(self, settings):
1030 """Go to web page root and list all files and directories."""
1031
1032 # Initialize the parent class.
1033 WebSite.__init__(self, settings)
1034
1035 self.root_dir = self.get_root_dir()
1036 logging.debug(f"LocalWebSite.__init__(): \tRoot directory: {self.root_dir:s}")
1037
1038 def get_root_dir(self):
1039 """Get the name of the root directory"""
1040 return self.user_settings.local_root_dir
1041
1042 def go_to_root_dir(self, root_dir):
1043 """Go to the root directory"""
1044
1045 # Go to the root directory.
1046 logging.debug(f"LocalWebSite.go_to_root_dir(): \tchdir to root directory: {root_dir:s}")
1047 os.chdir(root_dir)
1048
1049 # Read it back.
1050 self.root_dir = os.getcwd()
1051 logging.debug(f"LocalWebSite.go_to_root_dir(): \tgetcwd root directory: {self.root_dir:s}")
1052
1053 def one_level_down(self, d):
1054 """List all files and subdirectories in the current directory, dir. For files, collect file info
1055 such as time, date and size."""
1056
1057 directories = []
1058 files = []
1059
1060 # Change to current directory.
1061 os.chdir(d)
1062
1063 # List all subdirectories and files.
1064 dir_list = os.listdir(d)
1065
1066 if dir_list:
1067 for line in dir_list:
1068 # Add the full path prefix from the root.
1069 name = self.append_root_dir(d, line)
1070 logging.debug(f"LocalWebSite.one_level_down(): \tlocal dir or file {name:s}")
1071
1072 # Is it a directory or a file?
1073 if os.path.isdir(name):
1074 directories.append(name)
1075 elif os.path.isfile(name):
1076 # First assemble the file information of name, time/date and size into a list.
1077 # Can index it like an array. For example,
1078 # file_info =
1079 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
1080 # 1, -- Enum type FileType.FILE = 1.
1081 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a date/time class.
1082 # 4675] -- File size in bytes.
1083 file_info = [name,
1084 FileType.FILE,
1085 self.get_file_date_time(name),
1086 self.get_file_size(name)]
1087 files.append(file_info)
1088
1089 # Sort the names into order.
1090 if directories:
1091 directories.sort()
1092 if files:
1093 files.sort()
1094
1095 return directories, files
1096
1097 @staticmethod
1098 def get_file_date_time(file_name):
1099 """Get a local file time and date in UTC."""
1100
1101 file_epoch_time = os.path.getmtime(file_name)
1102 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1103 # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1104 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1105 return d
1106
1107 @staticmethod
1108 def get_file_size(file_name):
1109 """Get file size in bytes."""
1110 return os.path.getsize(file_name)
1111
1112 @staticmethod
1113 def clean_up_temp_file(temp_file_name, file_name, changed):
1114 """Remove the original file, rename the temporary file name to the original name.
1115 If there are no changes, just remove the temporary file.
1116 """
1117
1118 if changed:
1119 # Remove the old file now that we have the rewritten file.
1120 try:
1121 os.remove(file_name)
1122 logging.debug(f"Changes were made. Removed original file {file_name:s}")
1123 except OSError as detail:
1124 logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}. Need to remove it manually.")
1125
1126 # Rename the new file to the old file name.
1127 try:
1128 os.rename(temp_file_name, file_name)
1129 logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1130 except OSError as detail:
1131 logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}. Need to rename manually")
1132 else:
1133 # No changes? Remove the temporary file.
1134 try:
1135 os.remove(temp_file_name)
1136 logging.debug(f"No changes were made. Removed temporary file {temp_file_name:s}")
1137 except OSError as detail:
1138 logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}. Need to remove it manually.")
1139 return
1140
1141 @staticmethod
1142 def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1143 """
1144 Process each line of a file with a list of functions. Create a new temporary file.
1145
1146 The default list is None which means make an exact copy.
1147 """
1148
1149 # Assume no changes.
1150 changed = False
1151
1152 # Open both input and output files for processing. Check if we cannot do it.
1153 fin = None
1154 try:
1155 fin = open(in_file_name, "r")
1156 except IOError as detail:
1157 logging.error(f"process_lines_of_file(): \tCannot open file {in_file_name:s} for reading: {str(detail):s} Aborting...")
1158 if fin is not None:
1159 fin.close()
1160 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1161 fout = None
1162 try:
1163 fout = open(out_file_name, "w")
1164 except IOError as detail:
1165 logging.error(f"process_lines_of_file(): \tCannot open file {out_file_name:s} for writing: {str(detail):s} Aborting...")
1166 if fout is not None:
1167 fout.close()
1168 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1169
1170 # Read each line of the file, aborting if there is a read error.
1171 try:
1172 line = fin.readline()
1173
1174 # Rewrite the next line of the file using all the rewrite functions.
1175 while line:
1176 original_line = line
1177 # If we have one or more rewrite functions...
1178 if process_line_function_list is not None:
1179 # ...apply each rewrite functions to the line, one after the other in order.
1180 for processLineFunction in process_line_function_list:
1181 if processLineFunction is not None:
1182 line = processLineFunction(line)
1183
1184 if original_line != line:
1185 logging.debug("Rewrote the line" + ":" + f"|{original_line:s}|" + "into" + ":" + f"|{line:s}| for file" + ":" + f"{in_file_name:s}")
1186 changed = True
1187
1188 fout.write(line)
1189
1190 line = fin.readline()
1191
1192 fin.close()
1193 fout.close()
1194 except IOError as detail:
1195 logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s} Aborting...")
1196 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1197
1198 if changed:
1199 logging.debug(f"process_lines_of_file(): \tRewrote original file {in_file_name:s}."
1200 f"Changes are in temporary copy {out_file_name:s}")
1201
1202 # Return True if any lines were changed.
1203 return changed
1204
1205 def clean(self):
1206 """Scan through all directories and files in the local on disk website and clean them up."""
1207
1208 num_source_files_changed = 0
1209 num_source_files_syntax_highlighted = 0
1210
1211 logging.debug("Cleaning up the local web page.")
1212
1213 if self.directories is None or self.files is None:
1214 logging.error("Web site has no directories or files. Aborting...")
1215 raise UpdateWebException("Internal error for cleaning up the local web site. See the log file for details. Aborting... ")
1216
1217 for d in self.directories:
1218
1219 if self.is_temp_dir(d):
1220 # Add the full path prefix from the root.
1221 name = self.append_root_dir(self.get_root_dir(), d)
1222 try:
1223 logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1224 shutil.rmtree(name)
1225 except OSError as detail:
1226 logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1227
1228 for f in self.files:
1229 # Add the full path prefix from the root.
1230 full_file_name = self.append_root_dir(
1231 self.get_root_dir(), f[self.user_settings.FILE_NAME])
1232
1233 # Remove all temporary files.
1234 if self.is_temp_file(f):
1235 try:
1236 logging.debug(f"Removing temp file {full_file_name:s}")
1237 os.remove(full_file_name)
1238 except OSError as detail:
1239 logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1240
1241 # Update source code files.
1242 if self.is_source_or_hypertext_file(f):
1243 changed = self.rewrite_source_file(full_file_name)
1244 if changed:
1245 num_source_files_changed += 1
1246 logging.debug(f"Rewrote source code file {self.root_dir:s}")
1247
1248 # Generate a syntax highlighted code listing.
1249 # Make it the same time and date as the original code. Then, only if there are recent changes, we will update the remote server.
1250 if self.is_file_to_syntax_highlight(f):
1251 # full_file_name_highlighted = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1252 full_file_name_highlighted = self.create_syntax_highlighted_code_listing(full_file_name)
1253 if full_file_name_highlighted is not None:
1254 logging.debug(f"Generated a syntax highlighted source listing file {full_file_name_highlighted:s} for the file {full_file_name:s}")
1255 else:
1256 logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1257 num_source_files_syntax_highlighted += 1
1258
1259 logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1260 logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1261
1262 def is_temp_file(self, file_info):
1263 """Identify a file name as a temporary file"""
1264
1265 file_name = file_info[self.user_settings.FILE_NAME]
1266
1267 # Suffixes and names for temporary files be deleted.
1268 pat = self.user_settings.TEMP_FILE_SUFFIXES
1269 match = pat.search(file_name)
1270 # Remove any files containing twiddles anywhere in the name.
1271 if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1272 return True
1273
1274 return False
1275
1276 def is_temp_dir(self, dir_name):
1277 """Identify a name as a temporary directory."""
1278
1279 p = self.user_settings.TEMP_DIR_SUFFIX
1280 return p.search(dir_name)
1281
1282 def is_source_or_hypertext_file(self, file_info):
1283 """ Check if the file name is a source file or a hypertext file."""
1284
1285 file_name = file_info[self.user_settings.FILE_NAME]
1286 p1 = self.user_settings.SOURCE_FILE_PATTERN
1287 p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1288 if p1.search(file_name) or p2.search(file_name):
1289 return True
1290 else:
1291 return False
1292
1293 def is_file_to_syntax_highlight(self, file_info):
1294 """Check if this file type should have a syntax highlighted source listing."""
1295
1296 # Take apart the file name.
1297 full_file_name = file_info[self.user_settings.FILE_NAME]
1298 file_name = Path(full_file_name).name
1299
1300 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1301 if p.search(file_name):
1302 return True
1303 else:
1304 return False
1305
1306 def rewrite_substring(self, line):
1307 """Rewrite a line containing a pattern of your choice"""
1308
1309 # Start with the original unchanged line.
1310 rewritten_line = line
1311
1312 # Do the replacements in order from first to last.
1313 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1314 # Get the next pattern match replacement string tuple.
1315 [pat, rep_string] = match_replace_tuple
1316 # Does it match? Then do string substitution, else leave the line unchanged.
1317 match = pat.search(rewritten_line)
1318 if match:
1319 # Now we have these cases:
1320 # -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1321 # -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ...
1322 # If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1323 try:
1324 sub = pat.sub(rep_string, rewritten_line)
1325 rewritten_line = sub
1326 except IndexError as detail:
1327 logging.error(f"ERROR: {str(detail):s}. Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml. Did not rewrite the line |{rewritten_line:s}|")
1328
1329 return rewritten_line
1330
1331 def rewrite_email_address_line(self, line):
1332 """Rewrite lines containing old email addresses."""
1333
1334 # Search for the old email address.
1335 pat = self.user_settings.OLD_EMAIL_ADDRESS
1336 match = pat.search(line)
1337
1338 # Replace the old address with my new email address.
1339 if match:
1340 new_address = self.user_settings.NEW_EMAIL_ADDRESS
1341 sub = pat.sub(new_address, line)
1342 line = sub
1343
1344 return line
1345
1346 def rewrite_version_line(self, line):
1347 """Rewrite lines containing the current version of software."""
1348
1349 # Search for the current version.
1350 pat = self.user_settings.CURRENT_SOFTWARE_VERSION
1351 match = pat.search(line)
1352
1353 # Replace with the new version.
1354 if match:
1355 # Note that since we are using raw strings leading and trailing
1356 # whitespace is ignored.
1357 new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
1358 sub = pat.sub(new_version, line)
1359 line = sub
1360
1361 return line
1362
1363 def rewrite_copyright_line(self, line):
1364 """Rewrite copyright lines if they are out of date."""
1365
1366 # Match the lines,
1367 # Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1368 # Copyright © nnnn-mmmm by Sean Erik O'Connor.
1369 # and pull out the old year and save it.
1370 pat = self.user_settings.COPYRIGHT_LINE
1371 match = pat.search(line)
1372
1373 # Found a match.
1374 if match:
1375 old_year = int(match.group('old_year'))
1376
1377 # Replace the old year with the current year.
1378 # We matched and extracted the old copyright symbol into the variable
1379 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
1380 # We now insert it back by placing the special syntax \g<symbol>
1381 # into the replacement string.
1382 if old_year < WebSite.get_current_year():
1383 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
1384 str(WebSite.get_current_year())
1385 sub = pat.sub(new_copyright, line)
1386 line = sub
1387 return line
1388
1389 def rewrite_last_update_line(self, line):
1390 """Rewrite the Last Updated line if the year is out of date."""
1391
1392 # Match the last updated line and pull out the year.
1393 # last updated 01 Jan 24.
1394 p = self.user_settings.LAST_UPDATED_LINE
1395 m = p.search(line)
1396
1397 if m:
1398 last_update_year = int(m.group('year'))
1399
1400 # Convert to four digit years.
1401 if last_update_year > 90:
1402 last_update_year += 1900
1403 else:
1404 last_update_year += 2000
1405
1406 # If the year is old, rewrite to "01 Jan <current year>".
1407 if last_update_year < WebSite.get_current_year():
1408 two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1409 sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1410 line = sub
1411
1412 return line
1413
1414 def rewrite_source_file(self, file_name):
1415 """Rewrite copyright lines, last updated lines, etc."""
1416 changed = False
1417
1418 # Create a new temporary file name for the rewritten file.
1419 temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1420
1421 # Apply changes to all lines of the temporary file. Apply change functions in
1422 # the sequence listed.
1423 if self.process_lines_of_file(file_name, temp_file_name,
1424 [self.rewrite_copyright_line,
1425 self.rewrite_last_update_line,
1426 self.rewrite_email_address_line,
1427 self.rewrite_substring,
1428 self.rewrite_version_line]):
1429 logging.debug(f"Changed (rewritten) source file {file_name:s}")
1430 changed = True
1431
1432 # Rename the temporary file to the original file name. If no changes, just delete the temp file.
1433 self.clean_up_temp_file(temp_file_name, file_name, changed)
1434
1435 return changed
1436
1437 @staticmethod
1438 def create_syntax_highlighted_code_listing(full_file_name, **kwargs):
1439 """Create a syntax highlighted source listing for the file and return its name. Return None if there is an error.
1440 Keep the same date/time as the original file."""
1441
1442 # kwargs is a dictionary for key, value in kwargs.items():
1443 # for key, value in kwargs.items():
1444 # if key in kwargs:
1445 # print( f"kwargs:" )
1446 # print( f" key = |{key}|")
1447 # print( f" value = |{value}|" )
1448 dry_run_value = kwargs.get('dry_run')
1449 dry_run = False
1450 if dry_run_value is not None and dry_run_value is True:
1451 dry_run = True
1452
1453 # Take apart the file name.
1454 file_name_without_extension = Path(full_file_name).stem
1455 file_extension = Path(full_file_name).suffix
1456
1457 # Append *.html to the source code file name. This will be the syntax highlighted code listing.
1458 full_file_name_highlighted = f"{full_file_name}.html"
1459
1460 # First choose the language lexer from the file name itself if there's no extension.
1461 # Dotted file names are treated as the entire file name.
1462 match file_name_without_extension:
1463 case "makefile":
1464 lexer = MakefileLexer()
1465 case ".bash_profile"|".bashrc"|".bash_logout":
1466 lexer = BashLexer()
1467 case ".vimrc":
1468 lexer = VimLexer()
1469 case ".gitignore_global" | ".gitignore" | ".gitconfig":
1470 lexer = OutputLexer() # No formatting.
1471 case _:
1472 # Choose the language lexer from the file extension. Web stuff first, then programming languages.
1473 match file_extension:
1474 case ".html":
1475 lexer = HtmlLexer()
1476 case ".css":
1477 lexer = CssLexer()
1478 case ".js":
1479 lexer = JavascriptLexer()
1480 case ".sh":
1481 lexer = BashLexer()
1482 case ".py":
1483 lexer = PythonLexer()
1484 case ".c" | ".h":
1485 lexer = CLexer()
1486 case ".hpp" | ".cpp":
1487 lexer = CppLexer()
1488 case ".lsp":
1489 lexer = CommonLispLexer()
1490 case ".for" | ".FOR" | ".f":
1491 lexer = FortranFixedLexer() # Fixed format FORTRAN, not FORTRAN 90.
1492 case ".txt" | ".dat": # Generic data file; no formatting.
1493 lexer = OutputLexer()
1494 case ".tex":
1495 lexer = TexLexer() # LaTeX, TeX, or related files.
1496 case ".m":
1497 lexer = MatlabLexer()
1498 case ".yaml":
1499 lexer = YamlLexer()
1500 case _:
1501 logging.error(f"Can't find a lexer for file {full_file_name}. Cannot generate a syntax highlighted source listing. Aborting...")
1502 return None
1503
1504 # Read the source code file into a single string.
1505 try:
1506 with open(full_file_name, 'r') as fp:
1507 source_file_string = fp.read()
1508 except OSError as detail:
1509 logging.error(f"Cannot read the source code file {full_file_name:s} for syntax highlighting: {str(detail):s} Aborting...")
1510
1511 # Top level Pygments function generates the HTML for the highlighted code.
1512 highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1513
1514 # The style sheet is always the same for all languages.
1515 style_sheet = HtmlFormatter().get_style_defs('.highlight')
1516
1517 # Write out the syntax colored file.
1518 if dry_run:
1519 logging.debug(f"Dry run only: don't generate the syntax highlighted file {full_file_name_highlighted:s}")
1520 return None
1521 else:
1522 try:
1523 # Write out the highlighted code listing in HTML with CSS style sheet attached.
1524 with open(full_file_name_highlighted, 'w') as fp:
1525 fp.write(UserSettings.BASIC_HTML_BEGIN)
1526 fp.write(style_sheet)
1527 fp.write(UserSettings.BASIC_HTML_MIDDLE)
1528 fp.write(highlighted_html_source_file_string)
1529 fp.write(UserSettings.BASIC_HTML_END)
1530 except OSError as detail:
1531 logging.error(f"Cannot write the syntax highlighted file {full_file_name_highlighted:s}: {str(detail):s} Aborting...")
1532
1533 # Set the listing file to the same modification and access time and date as the source file.
1534 file_stat = os.stat(full_file_name)
1535 os.utime(full_file_name_highlighted, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1536
1537 # What is the listing file time now?
1538 file_epoch_time = os.path.getmtime(full_file_name_highlighted)
1539 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1540 d_list = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
1541
1542 # Source file and listing should be the same time.
1543 file_epoch_time = os.path.getmtime(full_file_name)
1544 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1545 d_source = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
1546 logging.debug(f"Generated a syntax highlighted listing {full_file_name_highlighted:s} with same time as source file {full_file_name:s}.")
1547 logging.debug(f"\tsource file time {d_source.ctime():s}")
1548 logging.debug(f"\tlisting file time {d_list.ctime():s}")
1549 return full_file_name_highlighted
1550
1551# ----------------------------------------------------------------------------
1552# Subclass which knows about the remote web site.
1553# ----------------------------------------------------------------------------
1554
1555class RemoteWebSite(WebSite):
1556 """Walk the remote web directory on a web server down from the root.
1557 Use FTP commands:
1558 https://en.wikipedia.org/wiki/List_of_FTP_commands
1559 Use the Python ftp library:
1560 https://docs.python.org/3/library/ftplib.html
1561 """
1562
1563 def __init__(self, settings, server, user, password, ftproot):
1564 """Connect to FTP server and list all files and directories."""
1565
1566 # Root directory of FTP server.
1567 self.root_dir = ftproot
1568 logging.debug(f"Requesting remote web site ftp root dir {self.root_dir:s}")
1569
1570 # Connect to FTP server and log in.
1571 try:
1572 # self.ftp.set_debuglevel( 2 )
1573 self.ftp = ftplib.FTP(server)
1574 self.ftp.login(user, password)
1575 # Catch all exceptions with the parent class Exception: all built-in,
1576 # non-system-exiting exceptions are derived from this class.
1577 except Exception as detail:
1578 # Extract the string message from the exception class with str().
1579 logging.error(f"Remote web site cannot login to ftp server: {str(detail):s} Aborting...")
1580 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1581 else:
1582 logging.debug("Remote web site ftp login succeeded.")
1583
1584 logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1585
1586 # Initialize the superclass.
1587 WebSite.__init__(self, settings)
1588
1589 def go_to_root_dir(self, root_dir):
1590 """Go to the root directory"""
1591
1592 try:
1593 # Go to the root directory.
1594 self.ftp.cwd(root_dir)
1595 logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1596
1597 # Read it back.
1598 self.root_dir = self.ftp.pwd()
1599 logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1600
1601 except Exception as detail:
1602 logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1603 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1604
1605 def get_root_dir(self):
1606 """Get the root directory name"""
1607
1608 return self.root_dir
1609
1610 def finish(self):
1611 """Quit remote web site"""
1612 logging.debug(f"Finished with WebSite object of class {type(self)}")
1613 try:
1614 self.ftp.quit()
1615 except Exception as detail:
1616 logging.error(f"Cannot ftp quit: {str(detail):s}")
1617
1618 def one_level_down(self, d):
1619 """List files and directories in a subdirectory using ftp"""
1620
1621 directories = []
1622 files = []
1623
1624 try:
1625 # ftp listing from current dir.
1626 logging.debug(f"RemoteWebSite.one_level_down(): \tftp cwd: {d:s}")
1627 self.ftp.cwd(d)
1628 dir_list = []
1629
1630 # Use the nonstandard -a option in LIST to show all the hidden .* files.
1631 # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1632 # Note the second argument requires a callback function.
1633 self.ftp.retrlines('LIST -a', dir_list.append)
1634
1635 except Exception as detail:
1636 logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}: {str(detail):s} Aborting...")
1637 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1638
1639 for line in dir_list:
1640 logging.debug(f"RemoteWebSite.one_level_down(): \tftp LIST: {line:s}")
1641
1642 # Line should at least have the minimum FTP information.
1643 if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1644 # Parse the FTP LIST and put the pieces into file_info.
1645 file_info = self.parse_ftp_list(line)
1646 logging.debug(f"RemoteWebSite.one_level_down(): \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1647
1648 # Skip over the UNIX hidden files for current and parent directories . and .. Also skip over any NULL file names.
1649 if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1650 logging.debug(f"RemoteWebSite.one_level_down(): \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1651 pass
1652 # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1653 elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1654 dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1655 logging.debug(f"RemoteWebSite.one_level_down(): \tftp dir (full path): {dirname:s}")
1656 directories.append(dirname)
1657 # For a file: Add the full path prefix from the root to the file name.
1658 else:
1659 file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1660 logging.debug(f"RemoteWebSite.one_level_down(): \tftp file (full path):\
1661 {file_info[self.user_settings.FILE_NAME]:s}")
1662 files.append(file_info)
1663 else:
1664 logging.error(f"RemoteWebSite.one_level_down(): \tFTP LIST line is too short: {line:s}")
1665
1666 directories.sort()
1667 files.sort()
1668
1669 return directories, files
1670
1671 def modtime(self, f):
1672 """Get the modification time of a file via ftp. Return 0 if ftp cannot get it."""
1673 modtime = 0
1674
1675 try:
1676 response = self.ftp.sendcmd('MDTM ' + f)
1677 # MDTM returns the last modified time of the file in the format
1678 # "213 YYYYMMDDhhmmss \r\n <error-response>
1679 # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1680 # error-response is 550 for info not available, and 500 or 501 if command cannot
1681 # be parsed.
1682 if response[:3] == '213':
1683 modtime = response[4:]
1684 except ftplib.error_perm as detail:
1685 logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1686 modtime = 0
1687
1688 return modtime
1689
1690 def parse_ftp_list(self, line):
1691 """Parse the ftp file listing and return file name, datetime and file size.
1692
1693 An FTP LIST command will give output which looks like this for a file:
1694
1695 -rw-r--r-- 1 1000 free 4084 Jul 18 16:55 sparkCoil.png
1696
1697 and for a directory:
1698
1699 drwxr-xr-x 2 1000 free 4096 Jul 18 16:36 ReadingList
1700
1701 FTP uses UTC for its listings; the conversion to local time is done by the OS.
1702 We can have problems on New Year's Eve. For example, the local file date/time is
1703
1704 Mon Jan 1 06:23:12 2018
1705
1706 But the remote file date/time from FTP listing doesn't show a year even though we
1707 know it was written to the server in 2017.
1708
1709 Mon Dec 31 03:02:00
1710
1711 So we default the remote file year to current year 2018 and get
1712
1713 Mon Dec 31 03:02:00 2018
1714
1715 Now we think that the remote file is newer by 363.860278 days.
1716 """
1717
1718 # Find out if we've a directory or a file.
1719 if line[0] == 'd':
1720 dir_or_file = FileType.DIRECTORY
1721 else:
1722 dir_or_file = FileType.FILE
1723
1724 pattern = self.user_settings.FTP_LISTING
1725
1726 # Sensible defaults.
1727 filesize = 0
1728 filename = ""
1729 # Default the time to midnight.
1730 hour = 0
1731 minute = 0
1732 seconds = 0
1733 # Default the date to Jan 1 of the current year.
1734 month = 1
1735 day = 1
1736 year = WebSite.get_current_year()
1737
1738 # Extract time and date from the ftp listing.
1739 match = pattern.search(line)
1740
1741 if match:
1742 filesize = int(match.group('bytes'))
1743 month = self.user_settings.monthToNumber[match.group('mon')]
1744 day = int(match.group('day'))
1745
1746 # Remote file listing contains the year. The FTP listing will omit the hour and minute.
1747 if match.group('year'):
1748 year = int(match.group('year'))
1749 logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1750 else:
1751 # Remote file listing omits the year. Default the year to the current UTC time year.
1752 # That may be incorrect (see comments above).
1753 year = WebSite.get_current_year()
1754 logging.debug(f"ftp is missing the year; use the current year = {year}")
1755
1756 # If the FTP listing has the hour and minute, it will omit the year.
1757 if match.group('hour') and match.group('min'):
1758 hour = int(match.group('hour'))
1759 minute = int(match.group('min'))
1760 logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1761
1762 filename = match.group('filename')
1763
1764 # Package up the time and date nicely.
1765 # Note if we didn't get any matches, we'll default the remote date and
1766 # time to Jan 1 midnight of the current year.
1767 d = datetime.datetime(year, month, day, hour, minute, seconds)
1768
1769 return [filename, dir_or_file, d, filesize]
1770
1771# ----------------------------------------------------------------------------
1772# Class for synchronizing local and remote web sites.
1773# ----------------------------------------------------------------------------
1774
1775class UpdateWeb(object):
1776 """Given previously scanned local and remote directories, update the remote website."""
1777
1778 def __init__(
1779 self,
1780 settings,
1781 server,
1782 user,
1783 password,
1784 ftproot,
1785 file_size_limit,
1786 local_directory_list,
1787 local_file_info,
1788 remote_directory_list,
1789 remote_file_info):
1790 """Connect to remote site. Accept previously scanned local and remote files and directories."""
1791
1792 self.user_settings = settings
1793
1794 self.local_files_list = []
1795 self.remote_files_list = []
1796 self.local_file_to_size = {}
1797 self.local_file_to_date_time = {}
1798 self.remote_file_to_date_time = {}
1799 self.local_only_dirs = []
1800 self.local_only_files = []
1801 self.remote_only_dirs = []
1802 self.remote_only_files = []
1803 self.common_files = []
1804
1805 # Connect to FTP server and log in.
1806 try:
1807 self.ftp = ftplib.FTP(server)
1808 self.ftp.login(user, password)
1809 except Exception as detail:
1810 logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1811 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1812 else:
1813 logging.debug("ftp login succeeded.")
1814
1815 logging.debug(f"ftp server welcome message: {self.ftp.getwelcome():s}")
1816
1817 # Local root directory.
1818 self.local_root_dir = self.user_settings.local_root_dir
1819 logging.debug(f"Local root directory: {self.local_root_dir:s}")
1820
1821 # Root directory of FTP server.
1822 self.ftp_root_dir = ftproot
1823 logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1824
1825 # Transform KB string to integer bytes. e.g. "200" => 2048000
1826 self.file_size_limit = int(file_size_limit) * 1024
1827
1828 try:
1829 # Go to the root directory.
1830 self.ftp.cwd(self.ftp_root_dir)
1831
1832 # Read it back.
1833 self.ftp_root_dir = self.ftp.pwd()
1834 logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1835 except Exception as detail:
1836 logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1837
1838 self.local_directory_list = local_directory_list
1839 self.remote_directory_list = remote_directory_list
1840 self.local_file_info = local_file_info
1841 self.remote_file_info = remote_file_info
1842
1843 def append_root_dir(self, root_dir, name):
1844 """Append the root directory to a path"""
1845
1846 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1847 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1848 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1849 return root_dir + name
1850 else:
1851 return root_dir + "/" + name
1852
1853 def file_info(self):
1854 """Create lists of file names from the file information. Also create dictionaries which map file names onto
1855 dates, times, and sizes."""
1856
1857 # Extract file names.
1858 self.local_files_list = [
1859 file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1860 self.remote_files_list = [
1861 file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1862
1863 # Use a dictionary comprehension to create key/value pairs,
1864 # (file name, file date/time)
1865 # which map file names onto date/time.
1866 self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1867 self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1868
1869 # Dictionary comprehension creates a mapping of local file names onto file sizes.
1870 self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1871
1872 def update(self):
1873 """Scan through the local website, cleaning it up.
1874 Go to remote website on my servers and synchronize all files."""
1875
1876 self.file_info()
1877
1878 # Which files and directories are different.
1879 self.changes()
1880
1881 # Synchronize with the local web site.
1882 self.synchronize()
1883
1884 def changes(self):
1885 """Find the set of different directories and files on local and remote."""
1886
1887 # Add all directories which are only on local to the dictionary.
1888 dir_to_type = {
1889 d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1890
1891 # Scan through all remote directories, adding those only on remote or
1892 # on both.
1893 for d in self.remote_directory_list:
1894 if d in dir_to_type:
1895 dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1896 else:
1897 dir_to_type[d] = FileType.ON_REMOTE_ONLY
1898
1899 # Add all files which are only on local to the dictionary.
1900 file_to_type = {
1901 f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1902
1903 # Scan through all remote files, adding those only on remote or on
1904 # both.
1905 for f in self.remote_files_list:
1906 if f in file_to_type:
1907 file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1908 else:
1909 file_to_type[f] = FileType.ON_REMOTE_ONLY
1910
1911 logging.debug("Raw dictionary dump of directories")
1912 for k, v in dir_to_type.items():
1913 logging.debug(f"\t dir: {str(k):s} type: {str(v):s}")
1914
1915 logging.debug("Raw dictionary dump of files")
1916 for k, v in file_to_type.items():
1917 logging.debug(f"\t file: {str(k):s} type: {str(v):s}")
1918
1919 # List of directories only on local. Keep the ordering.
1920 self.local_only_dirs = [
1921 d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1922
1923 # List of directories only on remote. Keep the ordering.
1924 self.remote_only_dirs = [
1925 d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1926
1927 # We don't care about common directories, only their changed files, if
1928 # any.
1929
1930 # List of files only on local. Keep the ordering.
1931 self.local_only_files = [
1932 f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1933
1934 # List of files only on remote. Keep the ordering.
1935 self.remote_only_files = [
1936 f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1937
1938 # List of common files on both local and remote. Keep the ordering.
1939 self.common_files = [
1940 f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1941
1942 logging.debug("*** Directories only on local ******************************")
1943 for d in self.local_only_dirs:
1944 logging.debug(f"\t {d:s}")
1945
1946 logging.debug("*** Directories only on remote ******************************")
1947 for d in self.remote_only_dirs:
1948 logging.debug(f"\t {d:s}")
1949
1950 logging.debug("*** Files only on local ******************************")
1951 for f in self.local_only_files:
1952 logging.debug(f"\t {f:s}")
1953
1954 logging.debug("*** Files only on remote ******************************")
1955 for f in self.remote_only_files:
1956 logging.debug(f"\t {f:s}")
1957
1958 logging.debug("*** Common files ******************************")
1959 for f in self.common_files:
1960 logging.debug(f"name {f:s}")
1961 logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1962 logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1963
1964 def synchronize(self):
1965 """Synchronize files and subdirectories in the remote directory with the local directory."""
1966
1967 # If we have the same files in local and remote, compare their times
1968 # and dates.
1969 for f in self.common_files:
1970 local_file_time = self.local_file_to_date_time[f]
1971 remote_file_time = self.remote_file_to_date_time[f]
1972
1973 # What's the time difference?
1974 time_delta = remote_file_time - local_file_time
1975 # How much difference, either earlier or later?
1976 seconds_different = abs(time_delta.total_seconds())
1977 minutes_different = seconds_different / 60.0
1978 hours_different = minutes_different / 60.0
1979 days_different = hours_different / 24.0
1980
1981 # Assume no upload initially.
1982 upload_to_host = False
1983
1984 logging.debug(f"Common file: {f:s}.")
1985
1986 # Remote file time is newer.
1987 # Allow 200 characters
1988 # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
1989
1990 if remote_file_time > local_file_time:
1991 # Remote file time is MUCH newer: suspect time is out of joint on the server, so upload local local file to be safe.
1992 if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
1993 logging.error(f"Remote file {f:s} is MUCH newer[more than {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes] by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days. Upload the file to be safe.")
1994 logging.error(f"\tlocal time {local_file_time.ctime():s}")
1995 logging.error(f"\tremote time {remote_file_time.ctime():s}")
1996
1997 # Set the local file to the current time.
1998 full_file_name = self.append_root_dir(
1999 self.local_root_dir, f)
2000 if os.path.exists(full_file_name):
2001 # Change the access and modify times of the file to the current time.
2002 os.utime(full_file_name, None)
2003 logging.error(f"Touching local file {full_file_name:s} to make it the current time")
2004
2005 upload_to_host = True
2006 # Remote file time is newer, but not by much. Let's just assume a slight time mismatch on the server. Don't upload.
2007 else:
2008 logging.error(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
2009 logging.error(f"\tlocal time {local_file_time.ctime():s}")
2010 logging.error(f"\tremote time {remote_file_time.ctime():s}")
2011 upload_to_host = False
2012
2013 # Local file time is newer.
2014 elif local_file_time > remote_file_time:
2015 # Local file time slightly newer than the remote file. So we are pretty sure the local file really got changed vs the server file.
2016 if days_different >= self.user_settings.DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
2017 logging.warning(f"Local file {f:20s} is SLIGHTLY newer [more than {self.user_settings.DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD} days] by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days. Preparing for upload.")
2018 logging.error(f"\tlocal time {local_file_time.ctime():s}")
2019 logging.error(f"\tremote time {remote_file_time.ctime():s}")
2020 upload_to_host = True
2021 else:
2022 logging.debug(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
2023 logging.error(f"\tlocal time {local_file_time.ctime():s}")
2024 logging.error(f"\tremote time {remote_file_time.ctime():s}")
2025 upload_to_host = False
2026
2027 # Cancel the upload if the file is too big for the server.
2028 size = self.local_file_to_size[f]
2029 if size >= self.file_size_limit:
2030 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
2031 upload_to_host = False
2032
2033 # Finally do the file upload.
2034 if upload_to_host:
2035 logging.debug(f"Uploading changed file {f:s}")
2036 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
2037 print(f"Uploading changed file {f:s}... ", end='', flush=True)
2038 self.upload(f)
2039
2040 # Remote directory is not in local. Delete it.
2041 for d in self.remote_only_dirs:
2042 logging.debug(f"Deleting remote only directory {d:s}")
2043 print(f"Deleting remote only directory {d:s}... ", end='', flush=True)
2044 self.rmdir(d)
2045
2046 # Local directory missing on remote. Create it.
2047 # Due to breadth first order scan, we'll create parent directories
2048 # before child directories.
2049 for d in self.local_only_dirs:
2050 logging.debug(f"Only on local. Creating new remote dir {d:s}.")
2051 print(f"Creating new remote directory {d:s}... ", end='', flush=True)
2052 self.mkdir(d)
2053
2054 # Local file missing on remote. Upload it.
2055 for f in self.local_only_files:
2056 logging.debug(f"Local only file. Uploading {f:s} to remote.")
2057
2058 # But cancel the upload if the file is too big for the server.
2059 size = self.local_file_to_size[f]
2060 if size >= self.file_size_limit:
2061 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d};"
2062 f" too large for server, limit is {self.file_size_limit:d} bytes")
2063 else:
2064 logging.debug(f"Uploading new file {f:s}")
2065 print(f"Uploading new file {f:s}... ", end='', flush=True)
2066 self.upload(f)
2067
2068 # Remote contains a file not present on the local. Delete the file.
2069 for f in self.remote_only_files:
2070 logging.debug(f"Remote only file. Deleting remote file {f:s}.")
2071 print(f"Deleting remote file {f:s}... ", end='', flush=True)
2072 self.del_remote(f)
2073
2074 def del_remote(self, relative_file_path):
2075 """Delete a file using ftp."""
2076
2077 logging.debug(f"del_remote(): \trelative file path name: {relative_file_path:s}")
2078
2079 # Parse the relative file path into file name and relative directory.
2080 relative_dir, file_name = os.path.split(relative_file_path)
2081 logging.debug(f"del_remote(): \tfile name: {file_name:s}")
2082 logging.debug(f"del_remote(): \trelative dir: {relative_dir:s}")
2083 logging.debug(f"del_remote(): \tremote root dir: {self.ftp_root_dir:s}")
2084
2085 try:
2086 # Add the remote root path and go to the remote directory.
2087 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2088 logging.debug(f"del_remote(): \tftp cd remote dir: {remote_dir:s}")
2089 self.ftp.cwd(remote_dir)
2090 except Exception as detail:
2091 logging.error(f"del_remote(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2092 else:
2093 try:
2094 logging.debug(f"del_remote(): \tftp rm: {file_name:s}")
2095
2096 # Don't remove zero length file names.
2097 if len(file_name) > 0:
2098 self.ftp.delete(file_name)
2099 else:
2100 logging.warning(
2101 "fdel_remote(): skipping ftp delete; file NAME {file_name:s} had zero length")
2102 except Exception as detail:
2103 logging.error(f"del_remote(): \tCannot ftp rm: {str(detail):s}")
2104
2105 def mkdir(self, relative_dir):
2106 """Create new remote directory using ftp."""
2107
2108 logging.debug(f"mkdir(): \trelative dir path name: {relative_dir:s}")
2109 logging.debug(f"mkdir(): \tremote root dir: {self.ftp_root_dir:s}")
2110
2111 # Parse the relative dir path into prefix dir and suffix dir.
2112 path, d = os.path.split(relative_dir)
2113 logging.debug(f"mkdir(): \tremote prefix dir: {path:s}")
2114 logging.debug(f"mkdir(): \tremote dir: {d:s}")
2115
2116 try:
2117 # Add the remote root path and go to the remote directory.
2118 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2119 logging.debug(f"mkdir(): \tftp cd remote dir: {remote_dir:s}")
2120 self.ftp.cwd(remote_dir)
2121 except Exception as detail:
2122 logging.error(f"mkdir(): \tCannot ftp chrdir: {str(detail):s} Skipping...")
2123 else:
2124 try:
2125 logging.debug(f"mkdir(): \tftp mkd: {d:s}")
2126 self.ftp.mkd(d)
2127 except Exception as detail:
2128 logging.error(f"mkdir(): \tCannot ftp mkdir: {str(detail):s}")
2129
2130 def rmdir(self, relative_dir):
2131 """Delete an empty directory using ftp."""
2132
2133 logging.debug(f"rmdir(): \tintermediate dir path name: {relative_dir:s}")
2134 logging.debug(f"rmdir(): \tremote root dir: {self.ftp_root_dir:s}")
2135
2136 # Parse the relative dir path into prefix dir and suffix dir.
2137 path, d = os.path.split(relative_dir)
2138 logging.debug(f"rmdir(): \tremote prefix dir: {path:s}")
2139 logging.debug(f"rmdir(): \tremote dir: {d:s}")
2140
2141 try:
2142 # Add the remote root path and go to the remote directory.
2143 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2144 logging.debug(f"rmdir(): \tftp cd remote dir: {remote_dir:s}")
2145 self.ftp.cwd(remote_dir)
2146 except Exception as detail:
2147 logging.error(f"rmdir(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2148 else:
2149 try:
2150 logging.debug(f"rmdir(): \tftp rmd: {d:s}")
2151 self.ftp.rmd(d)
2152 except Exception as detail:
2153 logging.error(f"rmdir(): \tCannot ftp rmdir dir {d:s}: {str(detail):s}. Directory is probably not empty. Do a manual delete.")
2154
2155 def download(self, relative_file_path):
2156 """Download a binary file using ftp."""
2157
2158 logging.debug(f"download(): \tfile name: {relative_file_path:s}")
2159
2160 # Parse the relative file path into file name and relative directory.
2161 relative_dir, file_name = os.path.split(relative_file_path)
2162 logging.debug(f"download(): \tfile name: {file_name:s}")
2163 logging.debug(f"download(): \trelative dir: {relative_dir:s}")
2164 logging.debug(f"download(): \troot dir: {self.ftp_root_dir:s}")
2165
2166 # Add the remote root path and go to the remote directory.
2167 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2168 logging.debug(f"download(): \tftp cd remote dir: {remote_dir:s}")
2169
2170 try:
2171 self.ftp.cwd(remote_dir)
2172 except Exception as detail:
2173 logging.error(f"download(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2174 else:
2175 # Add the local root path to get the local file name.
2176 # Open local binary file to write into.
2177 local_file_name = self.append_root_dir(
2178 self.local_root_dir, relative_file_path)
2179 logging.debug(f"download(): \topen local file name: {local_file_name:s}")
2180 try:
2181 f = open(local_file_name, "wb")
2182 try:
2183 # Calls f.write() on each block of the binary file.
2184 # ftp.retrbinary( "RETR " + file_name, f.write )
2185 pass
2186 except Exception as detail:
2187 logging.error(f"download(): \tCannot cannot ftp retrbinary: {str(detail):s}")
2188 f.close()
2189 except IOError as detail:
2190 logging.error(f"download(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2191
2192 def upload(self, relative_file_path):
2193 """Upload a binary file using ftp."""
2194
2195 logging.debug(f"upload(): \trelative file path name: {relative_file_path:s}")
2196
2197 # Parse the relative file path into file name and relative directory.
2198 relative_dir, file_name = os.path.split(relative_file_path)
2199 logging.debug(f"upload(): \tfile name: {file_name:s}")
2200 logging.debug(f"upload(): \trelative dir: {relative_dir:s}")
2201 logging.debug(f"upload(): \tremote root dir: {self.ftp_root_dir:s}")
2202
2203 # Add the remote root path and go to the remote directory.
2204 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2205 logging.debug(f"upload(): \tftp cd remote dir: {remote_dir:s}")
2206
2207 try:
2208 self.ftp.cwd(remote_dir)
2209 except Exception as detail:
2210 logging.error(f"upload(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2211 else:
2212 # Add the local root path to get the local file name.
2213 # Open local binary file to read from.
2214 local_file_name = self.append_root_dir(
2215 self.local_root_dir, relative_file_path)
2216 logging.debug(f"upload(): \topen local file name: {local_file_name:s}")
2217
2218 try:
2219 f = open(local_file_name, "rb")
2220 try:
2221 # f.read() is called on each block of the binary file until
2222 # EOF.
2223 logging.debug(f"upload(): \tftp STOR file {file_name:s}")
2224 self.ftp.storbinary("STOR " + file_name, f)
2225 except Exception as detail:
2226 logging.error(f"upload(): \tCannot ftp storbinary: {str(detail):s}")
2227 f.close()
2228 except IOError as detail:
2229 logging.error(f"upload(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2230
2231 def finish(self):
2232 """Log out of an ftp session"""
2233 logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2234 try:
2235 self.ftp.quit()
2236 except Exception as detail:
2237 logging.error(f"Cannot ftp quit because {str(detail):s}")
2238
2239# ----------------------------------------------------------------------------
2240# Main function
2241# ----------------------------------------------------------------------------
2242
2243def main(raw_args=None):
2244 """Main program. Clean up and update my website."""
2245
2246 # Print the obligatory legal notice.
2247 print("""
2248 updateweb Version 7.1 - A Python utility program which maintains my web site.
2249 Copyright (C) 2007-2024 by Sean Erik O'Connor. All Rights Reserved.
2250
2251 It deletes temporary files, rewrites old copyright lines and email address
2252 lines in source files, then synchronizes all changes to my web sites.
2253
2254 updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2255 GNU General Public License. This is free software, and you are welcome
2256 to redistribute it under certain conditions; see the GNU General Public
2257 License for details.
2258 """)
2259
2260 # Put ALL the main code into a try block!
2261 try:
2262 # ---------------------------------------------------------------------
2263 # Load default settings and start logging.
2264 # ---------------------------------------------------------------------
2265
2266 # Default user settings.
2267 user_settings = UserSettings()
2268
2269 print( f"Running main( {raw_args} ) Python version\
2270 {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2271 local web directory\
2272 {user_settings.local_root_dir}\n")
2273 # Get command line options such as --verbose. Pass them back as flags in
2274 # user_settings.
2275 CommandLineSettings(user_settings, raw_args)
2276
2277 # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2278 if user_settings.UNITTEST:
2279 suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2280 unittest.TextTestRunner(verbosity=2).run(suite)
2281 # We are done!
2282 print(" ...done!", flush=True)
2283 return
2284
2285 # Start logging to file. Verbose turns on logging for
2286 # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
2287 # otherwise we log only WARNING, ERROR, and CRITICAL levels.
2288 if user_settings.VERBOSE:
2289 loglevel = logging.DEBUG
2290 else:
2291 loglevel = logging.WARNING
2292
2293 # Pick the log file name on the host.
2294 if user_settings.CLEAN:
2295 user_settings.LOGFILENAME = "/private/logLocal.txt"
2296 else:
2297 user_settings.LOGFILENAME = "/private/logRemote.txt"
2298
2299 # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2300 if not user_settings.MATHJAX:
2301 user_settings.DIR_TO_SKIP += "|mathjax"
2302 else:
2303 print(f"Processing and uploading mathjax files. git restore any changed files and git clean -f to remove extra files... ", end='', flush=True)
2304 print(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually. If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections... ", end='', flush=True)
2305 logging.debug(f"Processing and uploading mathjax files. git restore any changed files and git clean -f to remove extra files.")
2306 logging.debug(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually. If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections...")
2307
2308 logging.basicConfig(
2309 level=loglevel,
2310 format='%(asctime)s %(levelname)-8s %(message)s',
2311 datefmt='%a, %d %b %Y %H:%M:%S',
2312 filename=user_settings.local_root_dir + user_settings.LOGFILENAME,
2313 filemode='w')
2314
2315 logging.debug("********** Begin logging")
2316
2317 # ---------------------------------------------------------------------
2318 # Scan the local website, finding out all files and directories.
2319 # ---------------------------------------------------------------------
2320
2321 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
2322 print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}... ", end='', flush=True)
2323 logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2324
2325 local = LocalWebSite(user_settings)
2326 local.scan()
2327
2328 # ---------------------------------------------------------------------
2329 # Clean up local website.
2330 # ---------------------------------------------------------------------
2331
2332 # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2333 print("Cleaning local web site... ", end='', flush=True)
2334 logging.debug("========================== Cleaning the local web site")
2335 local.clean()
2336
2337 # We are done with the first scan of the local web site and will dispose of it.
2338 local.finish()
2339 del local
2340
2341 # ---------------------------------------------------------------------
2342 # Rescan the local website since there will be changes to source
2343 # files from the clean up stage.
2344 # ---------------------------------------------------------------------
2345
2346 print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}", end='', flush=True)
2347 logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2348
2349 local = LocalWebSite(user_settings)
2350
2351 local.scan()
2352
2353 # ---------------------------------------------------------------------
2354 # List all the local directories and files and their sizes.
2355 # ---------------------------------------------------------------------
2356
2357 # Local website directories.
2358 local_directory_list = local.directories
2359 logging.debug("********** List of all the Local Directories")
2360 for d in local_directory_list:
2361 logging.debug(f"\t {d:s}")
2362
2363 # Generate lists of the local website filenames only, and their sizes in bytes.
2364 local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2365 total_number_of_files = len( local_files_name_size_pairs )
2366 logging.debug(f"********** List of all the Local Files from largest to smallest. There are {total_number_of_files:15d} files.")
2367 local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2368
2369 # Local website filenames only, and their dates and times.
2370 local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2371 logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2372 for file_datetime_pair in local_file_datetime_pairs:
2373 logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2374
2375 # Total number of bytes in the local files.
2376 total_number_of_bytes = 0
2377 for file_size_pair in local_files_name_size_pairs:
2378 logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2379 total_number_of_bytes += file_size_pair[1]
2380 logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2381
2382 local.finish()
2383
2384 if user_settings.CLEAN:
2385 logging.debug("========================== Done with local file and directory cleanup...")
2386 del local
2387 print("...done!", flush=True)
2388 return
2389
2390 # ---------------------------------------------------------------------
2391 # Scan the remote hosted web site.
2392 # ---------------------------------------------------------------------
2393
2394 print("Scanning remote web site...", end='', flush=True)
2395 logging.debug("========================== Scanning the remote web site...")
2396
2397 # Pick which website to update.
2398 logging.debug("Connecting to primary remote site.")
2399 remote = RemoteWebSite(user_settings,
2400 user_settings.SERVER_NAME,
2401 user_settings.USER_NAME,
2402 user_settings.PASSWORD_NAME,
2403 user_settings.FTP_ROOT_NAME)
2404 remote.scan()
2405 remote.finish()
2406
2407 # ---------------------------------------------------------------------
2408 # List all the remote server directories and files and their sizes.
2409 # ---------------------------------------------------------------------
2410
2411 remote_directory_list = remote.directories
2412 logging.debug("********** Remote Directories")
2413 for d in remote_directory_list:
2414 logging.debug(f"\t {d:s}")
2415
2416 # Local website filenames only, and their sizes in bytes.
2417 remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2418 total_number_of_files = len( remote_files_name_size_list )
2419 logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2420 remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2421 total_number_of_bytes = 0
2422 for file_size in remote_files_name_size_list:
2423 logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2424 total_number_of_bytes += file_size[1]
2425 logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2426
2427 # ---------------------------------------------------------------------
2428 # Synchronize the local and remote web sites.
2429 # ---------------------------------------------------------------------
2430
2431 print("Synchronizing remote and local web sites...", end='', flush=True)
2432 logging.debug("========================= Synchronizing remote and local web sites...")
2433
2434 # Primary website.
2435 logging.debug("Connecting to primary remote site for synchronization.")
2436 sync = UpdateWeb(user_settings,
2437 user_settings.SERVER_NAME,
2438 user_settings.USER_NAME,
2439 user_settings.PASSWORD_NAME,
2440 user_settings.FTP_ROOT_NAME,
2441 user_settings.FILE_SIZE_LIMIT_NAME,
2442 local.directories,
2443 local.files,
2444 remote.directories,
2445 remote.files)
2446
2447 sync.update()
2448 sync.finish()
2449
2450 del sync
2451 del remote
2452 del local
2453 print("...done!", flush=True)
2454
2455 except UpdateWebException as detail:
2456 logging.error(f"Couldn't update the web directory: {str(detail):s}. Aborting...")
2457
2458 except RecursionError as detail:
2459 logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}. Aborting...")
2460
2461if __name__ == '__main__':
2462 """Python executes all code in this file. Finally, we come here.
2463
2464 * If we are executing this file as a standalone Python script,
2465 the name of the current module is set to __main__ and thus we'll call the main() function.
2466
2467 * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2468
2469 import updateweb
2470 updateweb.main(["--test"])"""
2471
2472 main()