1#!/usr/bin/env python3
2# ============================================================================
3#
4# NAME
5#
6# updateweb.py
7#
8# DESCRIPTION
9#
10# Python script which updates my web sites.
11#
12# It does miscellaneous cleanup on my local copy of the web site on disk,
13# including updating copyright information, then synchronizes the local
14# copy to my remote server web sites using FTP.
15#
16# USAGE
17#
18# It's best to use the associated makefile.
19# But you can call this Python utility from the command line,
20#
21# $ python updateweb.py Clean up my local copy, then use it
22# to update my remote web server site.
23# Log warnings and errors.
24# $ python updateweb.py -v Same, but log debug messages also.
25# $ python updateweb.py -c Clean up my local copy only.
26# $ python updateweb.py -t Run unit tests only.
27# $ python updateweb.py -m Upload MathJax files (only need to do this once).
28#
29# We get username and password information from the file PARAMETERS_FILE.
30#
31# Logs are written to the files,
32#
33# logLocal.txt Local web site cleanup log.
34# logRemote.txt Remote web server update log.
35#
36# AUTHOR
37#
38# Sean E. O'Connor 23 Aug 2007 Version 1.0 released.
39#
40# LEGAL
41#
42# updateweb.py Version 7.4 - A Python utility program which maintains my web site.
43# Copyright (C) 2007-2025 by Sean Erik O'Connor. All Rights Reserved.
44#
45# This program is free software: you can redistribute it and/or modify
46# it under the terms of the GNU General Public License as published by
47# the Free Software Foundation, either version 3 of the License, or
48# (at your option) any later version.
49#
50# This program is distributed in the hope that it will be useful,
51# but WITHOUT ANY WARRANTY; without even the implied warranty of
52# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
53# GNU General Public License for more details.
54#
55# You should have received a copy of the GNU General Public License
56# along with this program. If not, see <http://www.gnu.org/licenses/>.
57#
58# The author's address is seanerikoconnor!AT!gmail!DOT!com
59# with !DOT! replaced by . and the !AT! replaced by @
60#
61# NOTES
62#
63# DOCUMENTATION
64#
65# Python interpreter: https://www.python.org/
66# Python tutorial and reference: https://docs.python.org/lib/lib.html
67# Python debugger: https://docs.python.org/3/library/pdb.html
68# Python regular expression howto: https://docs.python.org/3.7/howto/regex.html
69#
70# ============================================================================
71
72# ----------------------------------------------------------------------------
73# Load Python Packages
74# ----------------------------------------------------------------------------
75
76# OS stuff
77import sys
78import os
79import argparse
80import subprocess
81import shutil
82from pathlib import Path
83
84# Regular expressions
85import re
86
87# FTP stuff
88import ftplib
89
90# Date and time
91import time
92import stat
93import datetime
94
95# Logging
96import logging
97
98# Unit testing
99import unittest
100
101# Enumerated types (v3.4)
102from enum import Enum
103from typing import List, Any
104
105# YAML configuration files (a superset of JSON!)
106import yaml
107# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
108try:
109 from yaml import CLoader as Loader
110except ImportError:
111 from yaml import Loader
112
113# Python syntax highlighter. See https://pygments.org
114from pygments import highlight
115from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
116from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
117from pygments.formatters import HtmlFormatter
118
119
120# ----------------------------------------------------------------------------
121# Custom Top Level Exceptions.
122# ----------------------------------------------------------------------------
123
124class UpdateWebException(Exception):
125 """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
126 Derive from Exception as recommended by Python manual"""
127 pass
128
129# ----------------------------------------------------------------------------
130# User settings.
131# ----------------------------------------------------------------------------
132
133class TreeWalkSettings(Enum):
134 """Enum types for how to walk the directory tree."""
135 BREADTH_FIRST_SEARCH = 1
136 DEPTH_FIRST_SEARCH = 2
137
138class FileType(Enum):
139 """'Enum' types for properties of directories and files."""
140 DIRECTORY = 0
141 FILE = 1
142 ON_LOCAL_ONLY = 2
143 ON_REMOTE_ONLY = 3
144 ON_BOTH_LOCAL_AND_REMOTE = 4
145
146class UserSettings:
147 """Megatons of user selectable settings."""
148 # Logging control.
149 LOGFILENAME = ""
150 VERBOSE = False # Verbose mode. Prints out everything.
151 CLEAN = False # Clean the local website only.
152 UNITTEST = False # Run a unit test of a function.
153 MATHJAX = False # Process and upload MathJax files to server.
154
155 # When diving into the MathJax directory, web walking the deep directories
156 # may exceed Python's default recursion limit of 1000.
157 RECURSION_DEPTH = 5000
158 sys.setrecursionlimit(RECURSION_DEPTH)
159
160 # Fields in the file information (file_info) structure.
161 # For example, file_info =
162 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
163 # 1, -- Enum type: Is it a file? dir? on local? on remote? on both?
164 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a datetime class.
165 # 4675] -- File size in bytes.
166 FILE_NAME = 0
167 FILE_TYPE = 1
168 FILE_DATE_TIME = 2
169 FILE_SIZE = 3
170
171 # Server settings.
172 SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
173 SERVER_NAME = None
174 USER_NAME = None
175 PASSWORD_NAME = None
176 FTP_ROOT_NAME = None
177 FILE_SIZE_LIMIT_NAME = None
178
179 # Map month names onto numbers.
180 monthToNumber = {
181 'Jan': 1,
182 'Feb': 2,
183 'Mar': 3,
184 'Apr': 4,
185 'May': 5,
186 'Jun': 6,
187 'Jul': 7,
188 'Aug': 8,
189 'Sep': 9,
190 'Oct': 10,
191 'Nov': 11,
192 'Dec': 12}
193
194 # List of directories to skip over when processing or uploading the web page.
195 # Some are private but most are dir of temporary files.
196 # They will be listed as WARNING in the log.
197 # Examples:
198 # My private admin settings directory.
199 # Git or SVN local admin directories.
200 # Compile build directories fromXCode.
201 # PyCharm build directories.
202 # Python cache directories.
203 # Jupyter checkpoint directories.
204 # XCode temporary file crap.
205 DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
206
207 # List of files to skip when processing or uploading to the web page.
208 # They will be listed as WARNING in the log.
209 # Examples:
210 # MathJax yml file.
211 # .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
212 FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
213
214 # Suffixes for temporary files which will be deleted during the cleanup
215 # phase.
216 TEMP_FILE_SUFFIXES = r""" # Use Python raw strings.
217 \. # Match the dot in the file name.
218 # Now begin matching the file name suffix.
219 # (?: non-capturing match for the regex inside the parentheses,
220 # i.e. matching string cannot be retrieved later.
221 # Now match any of the following file extensions:
222 (?: o | obj | lib | # Object files generated by C, C++, etc compilers
223 pyc | # Object file generated by the Python compiler
224 ilk | pdb | sup | # Temp files from VC++ compiler
225 idb | ncb | opt | plg | # Temp files from VC++ compiler
226 sbr | bsc | map | bce | # Temp files from VC++ compiler
227 res | aps | dep | db | # Temp files from VC++ compiler
228 jbf | # Paintshop Pro
229 class | jar | # Java compiler
230 fas | # CLISP compiler
231 swp | swo | # Vim editor
232 toc | aux | # TeX auxilliary files (not .synctex.gz or .log)
233 DS_Store | _\.DS_Store | # macOS finder folder settings.
234 _\.Trashes | # macOS recycle bin
235 gdb_history) # GDB history
236 $ # Now we should see only the end of line.
237 """
238
239 # Special case: Vim temporary files contain a twiddle anywhere in the
240 # name.
241 VIM_TEMP_FILE_EXT = "~"
242
243 # Suffixes for temporary directories which should be deleted during the
244 # cleanup phase.
245 TEMP_DIR_SUFFIX = r""" # Use Python raw strings.
246 (?: Debug | Release | # C++ compiler
247 ipch | \.vs | # Temp directories from VC++ compiler
248 \.Trashes | \.Trash) # macOS recycle bin
249 $
250 """
251
252 # File extension for an internally created temporary file.
253 TEMP_FILE_EXT = ".new"
254
255 # Identify source file types.
256 HYPERTEXT_FILE_PATTERN = r""" # Use Python raw strings.
257 (\. # Match the filename suffix after the .
258 (?: html | htm | # HTML hypertext
259 css) # CSS style sheet
260 $) # End of line.
261 """
262
263 SOURCE_FILE_PATTERN = r""" # Use Python raw strings.
264 (?: makefile$ | # Any file called makefile is a source file.
265 # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
266 .vimrc$ | # Vim script
267 (.bashrc$ | # Bash configuration files.
268 .bash_profile$ |
269 .bash_logout$)
270 |
271 (.gitignore$ | # Git configuration files.
272 .gitignore_global$ |
273 .gitconfig$)
274 |
275 (\. # Match the filename suffix after the .
276 # Now match any of these suffixes:
277 (?:
278 c | cpp | h | hpp | # C++ and C
279 js | # JavaScript
280 py | # Python
281 lsp | # LISP
282 ipynb | # Jupyter notebook
283 m | # MATLAB
284 FOR | for | f | # FORTRAN
285 yaml | # YAML = JSON superset
286 tex | # LaTeX
287 txt | dat | # Data files
288 sh) # Bash
289 $) # End of line.
290 )
291 """
292
293 # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
294 SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
295 (?: ^life\.html$ | # We want a listing of this particular HTML file.
296 ^index\.html$ | # I want to list my top level HTML file. (There is only one file with this name at the top level web directory.)
297 ^webPageDesign\.html$ | # and also this HTML example file, but no others.
298 ^StyleSheet\.css$ ) # I want to list my style sheet.
299 """
300
301 # Files for which we want to generate a syntax highlighted source code listing.
302 # Uses an f-string combined with a raw-string.
303 FILE_TO_HIGHLIGHT_PATTERN = fr"""
304 (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} |
305 {SOURCE_FILE_PATTERN} )
306 """
307
308 # Update my email address.
309 # This is tricky: Prevent matching and updating the name within in this
310 # Python source file by using the character class brackets.
311 OLD_EMAIL_ADDRESS = r"""
312 artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
313 """
314 NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
315
316 # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.
317 # Read patterns and strings from the updateweb.yaml file.
318 STRING_REPLACEMENT_LIST = []
319 # Pairs of test strings and their correct match/replacements.
320 STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
321
322 # Match a copyright line like this:
323 # Copyright (C) 1999-2025 by Sean Erik O'Connor. All Rights Reserved.
324 # Extract the copyright symbol which can be ascii (C) or HTML © and extract the old year.
325 TWO_DIGIT_YEAR_FORMAT = "%02d"
326 COPYRIGHT_LINE = r"""
327 Copyright # Copyright.
328 \s+ # One or more spaces.
329 (?P<symbol> \(C\) | ©) # Match and extract the copyright symbol.
330 \D+ # Any non-digits.
331 (?P<old_year>[0-9]+) # Match and extract the old copyright year, place it into variable 'old_year'
332 - # hyphen
333 ([0-9]+) # New copyright year.
334 \s+ # One or more spaces.
335 by\s+Sean\sErik # Start of my name. This way we don't rewrite somebody else's copyright notice.
336 """
337
338 # Match a line containing the words,
339 # last updated YY
340 # and extract the two digit year YY.
341 LAST_UPDATED_LINE = r"""
342 last\s+ # Match the words "last updated"
343 updated\s+
344 \d+ # Day number
345 \s+ # One or more blanks or tab(
346 [A-Za-z]+ # Month
347 \s+ # One or more blanks or tabs
348 (?P<year>\d+) # Two digit year. Place it into the variable 'year'
349 """
350
351 # Web server root directory.
352 DEFAULT_ROOT_DIR = "/"
353
354 # The ftp listing occasionally shows a date newer than the actual date.
355 # On my server, it could be 6 months newer when we are near New Year's Day. Typically the server file time is only a 1 or 2 minutes newer.
356 # But if the remote file time is much newer, it might be an old file with a bad date/time.
357 # Upload the file to be safe.
358 # How to see the time differences from the log if they are large:
359 # egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
360 # Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
361 # How to see the time differences from the log if they are small and we wait and NOT upload:
362 # egrep -o "Remote file.*is newer.*days" logRemote.txt
363 # Remote file error404.html is newer by 102.0 seconds = 1.7 minutes = 0.0 hours = 0.0 days
364 # Remote file index.html is newer by 113.0 seconds = 1.9 minutes = 0.0 hours = 0.0 days
365 MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
366
367 # Upload only if we are newer by more than a few minutes. Allows for a little slop in time stamps on server or host.
368 MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
369
370 # An ftp list command line should be at least this many chars, or we'll
371 # suspect and error.
372 MIN_FTP_LINE_LENGTH = 7
373
374 # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
375 # ftp listings are generally similar to UNIX ls -l listings.
376 #
377 # Some examples:
378 #
379 # (1) Freeservers ftp listing,
380 #
381 # 0 1 2 3 4 5 6 7 8
382 # drwxr-xr-x 3 1000 1000 4096 Nov 18 2006 Electronics
383 # -rw-r--r-- 1 1000 1000 21984 Jun 4 03:46 StyleSheet.css
384 # -rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html
385 #
386 # (2) atspace ftp listing,
387 #
388 # drwxr-xr-x 3 seanerikoconnor vusers 49 Apr 7 2006 Electronics
389 # -rw-r--r-- 1 seanerikoconnor vusers 21984 Jun 4 04:03 StyleSheet.css
390 #
391 FTP_LISTING = r"""
392 [drwx-]+ # Unix type file mode.
393 \s+ # One or more blanks or tabs.
394 \d+ # Number of links.
395 \s+
396 \w+ # Owner.
397 \s+
398 \w+ # Group.
399 \s+
400 (?P<bytes> \d+) # File size in bytes, placed into the variable 'bytes'.
401 \s+
402 (?P<mon> \w+) # Month modified, placed into the variable 'mon'.
403 \s+
404 (?P<day> \d+) # Day modified, placed into the variable 'day'.
405 \s+
406 (
407 (?P<hour> \d+) # Hour modified, placed into the variable 'hour'.
408 :
409 (?P<min> \d+) # Minute modified, placed into the variable 'min'.
410 |
411 (?P<year> \d+) # If hours and minutes are absent (happens when year is not the current year),
412 # extract the year instead.
413 )
414 \s+
415 (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+) # Path and file name containing letters, numbers,
416 # and funny characters. We must escape some of
417 # these characters with a backslash, \.
418 """
419
420 # HTML header up to the style sheet.
421 BASIC_HTML_BEGIN = \
422 """
423 <!DOCTYPE html>
424 <html lang="en-US"> <!-- Set language of this page to USA English. -->
425
426 <head>
427 <!-- This page uses Unicode characters. -->
428 <meta charset="utf-8">
429
430 <!-- Set viewport to actual device width. Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
431 <meta name="viewport" content="width=device-width, initial-scale=1">
432
433 <!-- Title appears in the web browser tab for this page. The browser also uses it to bookmark this page. -->
434 <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
435
436 <!-- Search engines will search using words in this description. They will also display title in their search results. -->
437 <meta name="description" content="Syntax Colored Source Code Listing">
438
439 <!-- Some content management software uses the author's name. -->
440 <meta name="author" content="Sean Erik O'Connor">
441
442 <meta name="copyright" content="Copyright (C) 1986-2025 by Sean Erik O'Connor. All Rights Reserved.">
443
444 <!-- Begin style sheet insertion -->
445 <style>
446 /* Default settings for all my main web pages. */
447 body
448 {
449 /* A wide sans-serif font is more readable on the web. */
450 font-family: Verdana, Geneva, "Trebuchet MS", sans-serif ;
451
452 /* Set the body font size a little smaller than the user's default browser setting. */
453 font-size: 0.8em ;
454
455 /* Black text is easier to read. */
456 color: black ;
457
458 /* More vertical space between lines for more pleasant reading. Use a unitless font height multiplier.
459 Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
460 line-height: 1.7 ;
461 }
462
463 <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
464 """
465
466 # After the style sheet and up to the start of the article in the body.
467 BASIC_HTML_MIDDLE = \
468 """
469 </style>
470 </head>
471
472 <body>
473 <article class="content">
474 """
475
476 # After the source code listing, finish the article, body and html document.
477 BASIC_HTML_END = \
478 """
479 </article>
480 </body>
481
482 </html>
483 """
484
485 def __init__(self):
486 """Set up the user settings."""
487
488 self.local_root_dir = ""
489
490 # Import the user settings from the parameter file.
491 self.get_local_root_dir()
492 self.get_server_settings()
493
494 self.precompile_regular_expressions()
495
496 def get_server_settings(self):
497 """
498 Read web account private settings from a secret offline parameter file.
499 These also hold patterns to match and replace in all of our source pages.
500 """
501
502 # Private file which contains my account settings.
503 settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
504 # Recommended by
505 # https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
506 try:
507 stream = open(settings_file_name, "r")
508 except OSError as detail:
509 logging.error(f"Cannot open the YAML file {settings_file_name:s}. Unable to read the settings because: {str(detail):s}")
510 # Rethrow the exception higher.
511 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ") from detail
512 # Read all the YAML documents in the file.
513 yaml_contents = yaml.load_all(stream, Loader)
514 yaml_document_list: list[Any] = []
515 for yaml_doc in yaml_contents:
516 yaml_document_list.append(yaml_doc)
517 num_yaml_docs = len(yaml_document_list)
518 if num_yaml_docs != 2:
519 logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file. Aborting...")
520 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ")
521
522 # Load all the server settings.
523 self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
524 self.USER_NAME = yaml_document_list[0]['ftp_user_name']
525 self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
526 self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
527 self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
528
529 # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
530 self.STRING_REPLACEMENT_LIST = []
531 pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
532 for pat_rep in pat_rep_yaml_list:
533 # Fetch the regular expression and compile it for speed.
534 verbose_regex = pat_rep['pattern']
535 pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
536 # Since we use raw strings, we need to strip off leading and trailing whitespace.
537 replacement_string = pat_rep['replacement_string'].strip().lstrip()
538 self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
539
540 # Load the test and verify strings.
541 test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
542 for test_verify_string in test_verify_strings_list:
543 test_string = test_verify_string['test_string'].strip().lstrip()
544 verify_string = test_verify_string['verify_string'].strip().lstrip()
545 self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
546
547 print(" ...done!", flush=True)
548 return
549
550 def get_local_root_dir(self):
551 """Get the local website root directory on this platform."""
552
553 # Each platform has a definite directory for the web page.
554 local_web_dir_path = "/Desktop/Sean/WebSite"
555
556 if sys.platform.startswith('darwin'):
557 self.local_root_dir = str(Path.home()) + local_web_dir_path
558 # My Cyperpower PC running Ubuntu Linux.
559 elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
560 self.local_root_dir = str(Path.home()) + local_web_dir_path
561 return
562
563 def precompile_regular_expressions(self):
564 """For speed precompile the regular expression search patterns."""
565 self.COPYRIGHT_LINE = re.compile(self.COPYRIGHT_LINE, re.VERBOSE | re.IGNORECASE)
566 self.FTP_LISTING = re.compile(self.FTP_LISTING, re.VERBOSE | re.IGNORECASE)
567 self.TEMP_FILE_SUFFIXES = re.compile(self.TEMP_FILE_SUFFIXES, re.VERBOSE | re.IGNORECASE)
568 self.TEMP_DIR_SUFFIX = re.compile(self.TEMP_DIR_SUFFIX, re.VERBOSE)
569 self.SOURCE_FILE_PATTERN = re.compile(self.SOURCE_FILE_PATTERN, re.VERBOSE)
570 self.HYPERTEXT_FILE_PATTERN = re.compile(self.HYPERTEXT_FILE_PATTERN, re.VERBOSE)
571 self.OLD_EMAIL_ADDRESS = re.compile(self.OLD_EMAIL_ADDRESS, re.VERBOSE | re.IGNORECASE)
572 self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
573 self.LAST_UPDATED_LINE = re.compile(self.LAST_UPDATED_LINE, re.VERBOSE | re.IGNORECASE)
574
575# ----------------------------------------------------------------------------
576# Unit test individual functions.
577# ----------------------------------------------------------------------------
578
579class UnitTest(unittest.TestCase):
580 """Initialize the UnitTest class."""
581 def setUp(self):
582 self.user_settings = UserSettings()
583 self.user_settings.get_local_root_dir()
584
585 def tearDown(self):
586 """Clean up the UnitTest class."""
587 self.user_settings = None
588
589 def test_copyright_updating(self):
590 """Test copyright line updating to the current year."""
591 # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
592 line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2025 by Sean Erik O'Connor"
593 line_after_update_actual = "Copyright (C) 1999-2025 by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2025 by Sean Erik O'Connor"
594 pat = self.user_settings.COPYRIGHT_LINE
595 match = pat.search(line_before_update)
596
597 if match:
598 old_year = int(match.group('old_year'))
599 # Same as call to self.get_current_year():
600 current_year = int(time.gmtime()[0])
601 if old_year < current_year:
602 # We matched and extracted the old copyright symbol into the variable
603 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
604 # We now insert it back by placing the special syntax
605 # \g<symbol> into the replacement string.
606 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(current_year) + " by Sean Erik"
607 line_after_update_computed = pat.sub(new_copyright, line_before_update)
608 self.assertEqual(
609 line_after_update_actual,
610 line_after_update_computed,
611 f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
612 else:
613 print( "old_year >= current_year" )
614 self.fail()
615 else:
616 print( "no match for copyright pattern" )
617 self.fail()
618
619 def test_extract_filename_from_ftp_listing(self):
620 """Test parsing an FTP listing."""
621 ftp_line = "-rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html"
622 extracted_file_name = "allclasses-frame.html"
623 pat = self.user_settings.FTP_LISTING
624 match = pat.search(ftp_line)
625 if match:
626 filename = match.group('filename')
627 self.assertEqual(
628 filename,
629 extracted_file_name,
630 f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
631 else:
632 self.fail()
633
634 def test_get_file_time_and_date(self):
635 """Test getting a file time and date."""
636 # Point to an old file.
637 file_name = "./Images/home.png"
638 full_file_name = self.user_settings.local_root_dir + '/' + file_name
639 # Get the UTC time.
640 file_epoch_time = os.path.getmtime(full_file_name)
641 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
642 # Create a datetime object for the file.
643 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
644 # Check if the file time matches what we would see if we did ls -l <file_name> and then converted to UTC.
645 computed = f"file {file_name:s} datetime {d.ctime():s}"
646 actual = "file ./Images/home.png datetime Tue Jul 1 03:53:16 2025"
647 self.assertEqual(computed, actual)
648
649 def test_set_file_time_and_date(self):
650 """Test setting a file time and date."""
651 file_name = "./Images/home.png"
652 full_file_name = self.user_settings.local_root_dir + '/' + file_name
653 # Create a temporary file in the same directory.
654 temp_file_name = "temporal.tmp"
655 full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
656 try:
657 with open(full_temp_file_name, 'w') as fp:
658 fp.write("The End of Eternity")
659 except OSError as detail:
660 logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s} Aborting...")
661 raise UpdateWebException("Failed the unit test for setting time and date of a file. See the log file for details. Aborting...") from detail
662 # Get the old file time. Set the temporary file to the same time.
663 file_stat = os.stat(full_file_name)
664 os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
665 # What is the temporary file's time now?
666 file_epoch_time = os.path.getmtime(full_temp_file_name)
667 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
668 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
669 # Is the temporary file time set properly?
670 computed = f"file {file_name:s} datetime {d.ctime():s}"
671 actual = "file ./Images/home.png datetime Tue Jul 1 03:53:16 2025"
672 self.assertEqual(computed, actual)
673 os.remove(full_temp_file_name)
674
675 def test_difference_of_time_and_date(self):
676 """Test a date difference calculation."""
677 file_name = "./Images/home.png"
678 full_file_name = self.user_settings.local_root_dir + '/' + file_name
679 # Get the UTC time.
680 file_epoch_time = os.path.getmtime(full_file_name)
681 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
682 # Create a datetime object for the file.
683 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
684 # Slightly change the date and time by adding 1 minute.
685 d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1) # year, month, day, hour, minute, second
686 time_delta = d2 - d
687 seconds_different = time_delta.total_seconds()
688 minutes_different = seconds_different / 60.0
689 hours_different = minutes_different / 60.0
690 days_different = hours_different / 24.0
691 computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
692 actual = "difference 0.00001 days, 0.00028 hours 0.01667 minutes, 1.00000 seconds"
693 self.assertEqual(computed, actual)
694
695 def test_pattern_match_dir_to_skip(self):
696 """Test if skipping certain named directories is recoginizing the dir names."""
697 dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
698 pat = re.compile(self.user_settings.DIR_TO_SKIP)
699 if pat.search(dir_skip):
700 self.assertTrue(True)
701 else:
702 self.assertTrue(False)
703
704 def test_file_name_to_syntax_highlight(self):
705 """Test if syntax highlighting recognizes file names to highlight."""
706 file_name1 = "Computer/hello.lsp"
707 file_name2 = "Computer/life.html"
708 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
709 if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
710 self.assertTrue(True)
711 else:
712 self.assertTrue(False)
713
714 def test_user_settings(self):
715 """Test whether user settings are correctly initialized."""
716 computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
717 actual = "File size limit = 50000 K"
718 self.assertEqual(computed, actual, "File size limit settings are incorrect.")
719
720 def test_check_replace_substring(self,debug=True):
721 """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
722 For troubleshooting, turn on debug.
723 """
724 test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
725 # Iterate over all test strings.
726 for pair in test_verify_pairs:
727 [test_string, verify_string] = pair
728 if debug:
729 print( f">>>>>>> next test string = {test_string}")
730 print( f">>>>>>> next verify string = {verify_string}")
731 # Iterate over all patterns and replacements.
732 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
733 [pat, rep_string] = match_replace_tuple
734 print( f"\t-------> next pattern = {pat}")
735 print( f"\t-------> next replacement = {rep_string}")
736 match = pat.search(test_string)
737 # The pattern match succeeds.
738 if match:
739 try:
740 sub = pat.sub(rep_string, test_string)
741 # String replacement succeeds for this pattern/replace pair iteration.
742 if debug:
743 print( f"\t\t.......> match and replace: {test_string} ---> {sub}")
744 test_string = sub
745 except IndexError as detail:
746 print(f"\t\t.......> Caught an exception: {str(detail):s}. Replacement failed.")
747 if debug:
748 self.assertTrue(False)
749 elif debug:
750 print( f"\t\t.......> no match for pattern = {pat} in test string = {test_string}")
751 # No match, so go on to the next pattern and don't change test_string.
752 # Done with all pattern/replace on test string.
753 # Check this test string in the list.
754 self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
755 if debug:
756 print( f"\t******* DONE with all pattern matches and replacements on this test/verify string pair.\n" )
757
758# ----------------------------------------------------------------------------
759# Command line options.
760# ----------------------------------------------------------------------------
761
762class CommandLineSettings(object):
763 """Get the command line options."""
764
765 def __init__(self, user_settings, raw_args=None):
766 """Get command line options"""
767 command_line_parser = argparse.ArgumentParser(
768 description="updateweb options")
769
770 # Log all changes, not just warnings and errors.
771 command_line_parser.add_argument(
772 "-v",
773 "--verbose",
774 help="Turn on verbose mode to log everything",
775 action="store_true")
776
777 # Clean up the local website only.
778 command_line_parser.add_argument(
779 "-c",
780 "--clean",
781 help="Do a cleanup on the local web site only.",
782 action="store_true")
783
784 # Also upload MathJax.
785 command_line_parser.add_argument(
786 "-m",
787 "--mathjax",
788 help="""ALSO upload mathjax directory.\
789 Do this if you have a new version of MathJax or if have not yet created the /mathjax remote directory on the server.\
790 Recommend that you run the bash command: find . -name '*' -exec touch {} \\; This will ensure accurate times on the server.""",
791 action="store_true")
792
793 # Run unit tests only.
794 command_line_parser.add_argument("-t", "--test",
795 help="Run unit tests.",
796 action="store_true")
797
798 args = command_line_parser.parse_args(raw_args)
799
800 if args.verbose:
801 user_settings.VERBOSE = True
802 if args.clean:
803 user_settings.CLEAN = True
804 if args.test:
805 user_settings.UNITTEST = True
806 if args.mathjax:
807 user_settings.MATHJAX = True
808
809# ----------------------------------------------------------------------------
810# Base class which describes my web site overall.
811# ----------------------------------------------------------------------------
812
813class WebSite(object):
814 """
815 Abstract class used for analyzing both local and remote (ftp server) websites.
816 Contains the web-walking functions which traverse the directory structures and files.
817 These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
818 Child classes may define additional functions which only they need.
819 """
820
821 def __init__(self, settings):
822 """Set up root directories"""
823
824 # Import the user settings.
825 self.user_settings = settings
826
827 # Queue keeps track of directories not yet processed.
828 self.queue = []
829
830 # List of all directories traversed.
831 self.directories = []
832
833 # List of files traversed, with file information.
834 self.files = []
835
836 # Find out the root directory and go there.
837 self.root_dir = self.get_root_dir()
838 self.go_to_root_dir(self.root_dir)
839
840 # This is a Python decorator which says get_current_year is a class function. And so there is no self first argument, and you can call it without creating an
841 # instance of this class. Call it from anywhere, inside or outside the class, using WebSite.get_current_year(). You could just create a global function instead.)
842 @staticmethod
843 def get_current_year():
844 """Get the current year."""
845 return int(time.gmtime()[0])
846
847 @staticmethod
848 def get_current_two_digit_year():
849 """Get the last two digits of the current year."""
850 return WebSite.get_current_year() % 100
851
852 @staticmethod
853 def is_file_info_type(file_info):
854 """Check if we have a file information structure or merely a simple file name."""
855 try:
856 if isinstance(file_info, list):
857 return True
858 elif isinstance(file_info, str):
859 return False
860 else:
861 logging.error("is_file_info_type found a bad type. Aborting...")
862 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ")
863 except TypeError as detail:
864 logging.error(f"is_file_info_type found a bad type {str(detail):s}. Aborting...")
865 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ") from detail
866
867 def get_root_dir(self):
868 """Subclass: Put code here to get the root directory"""
869 return ""
870
871 def go_to_root_dir(self, root_dir):
872 """Subclass: Put code here to go to the root directory"""
873 pass # Pythons's do-nothing statement.
874
875 def one_level_down(self, d):
876 """Subclass: Fill in with a method which returns a list of the
877 directories and files immediately beneath dir"""
878 return [], []
879
880 def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
881 """Walk a directory in either depth first or breadth first order. BFS is the default."""
882
883 # Get all subfiles and subdirectories off this node.
884 subdirectories, subfiles = self.one_level_down(d)
885
886 # Add all the subfiles in order.
887 for f in subfiles:
888
889 name = self.strip_root(f)
890 logging.debug(f"Webwalking: Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
891
892 # Some files are private so skip them from consideration.
893 pat = re.compile(self.user_settings.FILE_TO_SKIP)
894
895 if pat.search(name[self.user_settings.FILE_NAME]):
896 logging.debug( f"Webwalking: Skipping private file {name[self.user_settings.FILE_NAME]:s}")
897 # Don't upload any *.log files either; we are currently writing to this file.
898 elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
899 logging.debug(f"Webwalking: Skipping log file {name[self.user_settings.FILE_NAME]:s}")
900 else:
901 # OK to add this file to the list for possible uploading.
902 self.files.append(name)
903
904 # Queue up the subdirectories.
905 for d in subdirectories:
906 # Some directories are private such as .git or just temporary file
907 # caches so skip them from consideration.
908 pat = re.compile(self.user_settings.DIR_TO_SKIP)
909 if pat.search(d):
910 logging.debug(f"Webwalking: Skipping private dir {d:s}")
911 else:
912 logging.debug(f"Webwalking: Pushing dir {d:s} on the queue.")
913 self.queue.append(d)
914
915 # Search through the directories.
916 while len(self.queue) > 0:
917 # For breadth first search, remove from beginning of queue.
918 if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
919 d = self.queue.pop(0)
920
921 # For depth first search, remove from end of queue.
922 elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
923 d = self.queue.pop()
924 else:
925 d = self.queue.pop(0)
926
927 name = self.strip_root(d)
928 logging.debug(f"Webwalking: Adding relative directory {name:s} to list, full path = {d:s}.")
929 self.directories.append(name)
930
931 self.walk(d)
932
933 def strip_root(self, file_info):
934 """Return a path, but strip off the root directory"""
935
936 root = self.root_dir
937
938 # Extract the file name.
939 if self.is_file_info_type(file_info):
940 name = file_info[self.user_settings.FILE_NAME]
941 else:
942 name = file_info
943
944 # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
945 # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
946 # Art/foo.txt
947 lenroot = len(root)
948 if root == self.user_settings.DEFAULT_ROOT_DIR:
949 pass
950 else:
951 lenroot = lenroot + 1
952
953 stripped_path = name[lenroot:]
954
955 if self.is_file_info_type(file_info):
956 # Update the file name only.
957 return [stripped_path,
958 file_info[self.user_settings.FILE_TYPE],
959 file_info[self.user_settings.FILE_DATE_TIME],
960 file_info[self.user_settings.FILE_SIZE]]
961 else:
962 return stripped_path
963
964 def append_root_dir(self, root_dir, name):
965 """Append the root directory to a path"""
966
967 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
968 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
969 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
970 return root_dir + name
971 else:
972 return root_dir + "/" + name
973
974 def scan(self):
975 """Scan the directory tree recursively from the root"""
976 logging.debug(f"Webwalking: Beginning recursive directory scan from root directory {self.root_dir:s}")
977 self.walk(self.root_dir)
978
979 def modtime(self, f):
980 """Subclass: Get file modification time"""
981 pass
982
983 def finish(self):
984 """Quit web site"""
985 logging.debug(f"Finished with WebSite object of class {type(self)}")
986 pass
987
988# ----------------------------------------------------------------------------
989# Subclass which knows about the local web site on disk.
990# ----------------------------------------------------------------------------
991
992class LocalWebSite(WebSite):
993 """Walk the local web directory on local disk down from the root.
994 Clean up temporary files and do other cleanup work."""
995
996 def __init__(self, settings):
997 """Go to web page root and list all files and directories."""
998
999 # Initialize the parent class.
1000 WebSite.__init__(self, settings)
1001
1002 self.root_dir = self.get_root_dir()
1003 logging.debug(f"LocalWebSite.__init__(): \tRoot directory: {self.root_dir:s}")
1004
1005 def get_root_dir(self):
1006 """Get the name of the root directory"""
1007 return self.user_settings.local_root_dir
1008
1009 def go_to_root_dir(self, root_dir):
1010 """Go to the root directory"""
1011
1012 # Go to the root directory.
1013 logging.debug(f"LocalWebSite.go_to_root_dir(): \tchdir to root directory: {root_dir:s}")
1014 os.chdir(root_dir)
1015
1016 # Read it back.
1017 self.root_dir = os.getcwd()
1018 logging.debug(f"LocalWebSite.go_to_root_dir(): \tgetcwd root directory: {self.root_dir:s}")
1019
1020 def one_level_down(self, d):
1021 """List all files and subdirectories in the current directory, dir. For files, collect file info
1022 such as time, date and size."""
1023
1024 directories = []
1025 files = []
1026
1027 # Change to current directory.
1028 os.chdir(d)
1029
1030 # List all subdirectories and files.
1031 dir_list = os.listdir(d)
1032
1033 if dir_list:
1034 for line in dir_list:
1035 # Add the full path prefix from the root.
1036 name = self.append_root_dir(d, line)
1037 logging.debug(f"LocalWebSite.one_level_down(): \tlocal dir or file {name:s}")
1038
1039 # Is it a directory or a file?
1040 if os.path.isdir(name):
1041 directories.append(name)
1042 elif os.path.isfile(name):
1043 # First assemble the file information of name, time/date and size into a list.
1044 # Can index it like an array. For example,
1045 # file_info =
1046 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
1047 # 1, -- Enum type FileType.FILE = 1.
1048 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a date/time class.
1049 # 4675] -- File size in bytes.
1050 file_info = [name,
1051 FileType.FILE,
1052 self.get_file_date_time(name),
1053 self.get_file_size(name)]
1054 files.append(file_info)
1055
1056 # Sort the names into order.
1057 if directories:
1058 directories.sort()
1059 if files:
1060 files.sort()
1061
1062 return directories, files
1063
1064 @staticmethod
1065 def get_file_date_time(file_name):
1066 """Get a local file time and date in UTC."""
1067
1068 file_epoch_time = os.path.getmtime(file_name)
1069 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1070 # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1071 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1072 return d
1073
1074 @staticmethod
1075 def get_file_size(file_name):
1076 """Get file size in bytes."""
1077 return os.path.getsize(file_name)
1078
1079 @staticmethod
1080 def clean_up_temp_file(temp_file_name, file_name, changed):
1081 """Remove the original file, rename the temporary file name to the original name.
1082 If there are no changes, just remove the temporary file.
1083 """
1084
1085 if changed:
1086 # Remove the old file now that we have the rewritten file.
1087 try:
1088 os.remove(file_name)
1089 logging.debug(f"Changes were made. Removed original file {file_name:s}")
1090 except OSError as detail:
1091 logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}. Need to remove it manually.")
1092
1093 # Rename the new file to the old file name.
1094 try:
1095 os.rename(temp_file_name, file_name)
1096 logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1097 except OSError as detail:
1098 logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}. Need to rename manually")
1099 else:
1100 # No changes? Remove the temporary file.
1101 try:
1102 os.remove(temp_file_name)
1103 logging.debug(f"No changes were made. Removed temporary file {temp_file_name:s}")
1104 except OSError as detail:
1105 logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}. Need to remove it manually.")
1106 return
1107
1108 @staticmethod
1109 def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1110 """
1111 Process each line of a file with a list of functions. Create a new temporary file.
1112
1113 The default list is None which means make an exact copy.
1114 """
1115
1116 # Assume no changes.
1117 changed = False
1118
1119 # Open both input and output files for processing. Check if we cannot do it.
1120 fin = None
1121 try:
1122 fin = open(in_file_name, "r")
1123 except IOError as detail:
1124 logging.error(f"process_lines_of_file(): \tCannot open file {in_file_name:s} for reading: {str(detail):s} Aborting...")
1125 if fin is not None:
1126 fin.close()
1127 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1128 fout = None
1129 try:
1130 fout = open(out_file_name, "w")
1131 except IOError as detail:
1132 logging.error(f"process_lines_of_file(): \tCannot open file {out_file_name:s} for writing: {str(detail):s} Aborting...")
1133 if fout is not None:
1134 fout.close()
1135 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1136
1137 # Read each line of the file, aborting if there is a read error.
1138 try:
1139 line = fin.readline()
1140
1141 # Rewrite the next line of the file using all the rewrite functions.
1142 while line:
1143 original_line = line
1144 # If we have one or more rewrite functions...
1145 if process_line_function_list is not None:
1146 # ...apply each rewrite functions to the line, one after the other in order.
1147 for processLineFunction in process_line_function_list:
1148 if processLineFunction is not None:
1149 line = processLineFunction(line)
1150
1151 if original_line != line:
1152 logging.debug(f"Rewrote the line: >>>{original_line:s}<<< into >>>{line:s}<<< for file {in_file_name:s}")
1153 changed = True
1154
1155 fout.write(line)
1156
1157 line = fin.readline()
1158
1159 fin.close()
1160 fout.close()
1161 except IOError as detail:
1162 logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s} Aborting...")
1163 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1164
1165 if changed:
1166 logging.debug(f"process_lines_of_file(): \tRewrote original file {in_file_name:s}."
1167 f"Changes are in temporary copy {out_file_name:s}")
1168
1169 # Return True if any lines were changed.
1170 return changed
1171
1172 def clean(self):
1173 """Scan through all directories and files in the local on disk website and clean them up."""
1174
1175 num_source_files_changed = 0
1176 num_source_files_syntax_highlighted = 0
1177
1178 logging.debug("Cleaning up the local web page.")
1179
1180 if self.directories is None or self.files is None:
1181 logging.error("Web site has no directories or files. Aborting...")
1182 raise UpdateWebException("Internal error for cleaning up the local web site. See the log file for details. Aborting... ")
1183
1184 for d in self.directories:
1185
1186 if self.is_temp_dir(d):
1187 # Add the full path prefix from the root.
1188 name = self.append_root_dir(self.get_root_dir(), d)
1189 try:
1190 logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1191 shutil.rmtree(name)
1192 except OSError as detail:
1193 logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1194
1195 for f in self.files:
1196 # Add the full path prefix from the root.
1197 full_file_name = self.append_root_dir(
1198 self.get_root_dir(), f[self.user_settings.FILE_NAME])
1199
1200 # Remove all temporary files.
1201 if self.is_temp_file(f):
1202 try:
1203 logging.debug(f"Removing temp file {full_file_name:s}")
1204 os.remove(full_file_name)
1205 except OSError as detail:
1206 logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1207
1208 # Update source code files.
1209 if self.is_source_or_hypertext_file(f):
1210 changed = self.rewrite_source_file(full_file_name)
1211 if changed:
1212 num_source_files_changed += 1
1213 logging.debug(f"Rewrote source code file {self.root_dir:s}")
1214
1215 # Generate a syntax highlighted code listing.
1216 # Make it the same time and date as the original code. Then, only if there are recent changes, we will update the remote server.
1217 if self.is_file_to_syntax_highlight(f):
1218 # syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1219 syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name)
1220 if syntax_highlighted_file_name is not None:
1221 logging.debug(f"Generated a syntax highlighted source listing file {syntax_highlighted_file_name:s} for the file {full_file_name:s}")
1222 else:
1223 logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1224 num_source_files_syntax_highlighted += 1
1225
1226 logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1227 logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1228
1229 def is_temp_file(self, file_info):
1230 """Identify a file name as a temporary file"""
1231
1232 file_name = file_info[self.user_settings.FILE_NAME]
1233
1234 # Suffixes and names for temporary files be deleted.
1235 pat = self.user_settings.TEMP_FILE_SUFFIXES
1236 match = pat.search(file_name)
1237 # Remove any files containing twiddles anywhere in the name.
1238 if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1239 return True
1240
1241 return False
1242
1243 def is_temp_dir(self, dir_name):
1244 """Identify a name as a temporary directory."""
1245
1246 p = self.user_settings.TEMP_DIR_SUFFIX
1247 return p.search(dir_name)
1248
1249 def is_source_or_hypertext_file(self, file_info):
1250 """ Check if the file name is a source file or a hypertext file."""
1251
1252 file_name = file_info[self.user_settings.FILE_NAME]
1253 p1 = self.user_settings.SOURCE_FILE_PATTERN
1254 p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1255 if p1.search(file_name) or p2.search(file_name):
1256 return True
1257 else:
1258 return False
1259
1260 def is_file_to_syntax_highlight(self, file_info):
1261 """Check if this file type should have a syntax highlighted source listing."""
1262
1263 # Take apart the file name.
1264 full_file_name = file_info[self.user_settings.FILE_NAME]
1265 file_name = Path(full_file_name).name
1266
1267 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1268 if p.search(file_name):
1269 return True
1270 else:
1271 return False
1272
1273 def rewrite_substring(self, line):
1274 """Rewrite a line containing a pattern of your choice"""
1275
1276 # Start with the original unchanged line.
1277 rewritten_line = line
1278
1279 # Do the replacements in order from first to last.
1280 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1281 # Get the next pattern match replacement string tuple.
1282 [pat, rep_string] = match_replace_tuple
1283 # Does it match? Then do string substitution, else leave the line unchanged.
1284 match = pat.search(rewritten_line)
1285 if match:
1286 # Now we have these cases:
1287 # -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1288 # -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ...
1289 # If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1290 try:
1291 sub = pat.sub(rep_string, rewritten_line)
1292 rewritten_line = sub
1293 except IndexError as detail:
1294 logging.error(f"ERROR: {str(detail):s}. Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml. Did not rewrite the line |{rewritten_line:s}|")
1295
1296 return rewritten_line
1297
1298 def rewrite_email_address_line(self, line):
1299 """Rewrite lines containing old email addresses."""
1300
1301 # Search for the old email address.
1302 pat = self.user_settings.OLD_EMAIL_ADDRESS
1303 match = pat.search(line)
1304
1305 # Replace the old address with my new email address.
1306 if match:
1307 new_address = self.user_settings.NEW_EMAIL_ADDRESS
1308 sub = pat.sub(new_address, line)
1309 line = sub
1310
1311 return line
1312
1313 def rewrite_copyright_line(self, line):
1314 """Rewrite copyright lines if they are out of date."""
1315
1316 # Match the lines,
1317 # Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1318 # Copyright © nnnn-mmmm by Sean Erik O'Connor.
1319 # and pull out the old year and save it.
1320 pat = self.user_settings.COPYRIGHT_LINE
1321 match = pat.search(line)
1322
1323 # Found a match.
1324 if match:
1325 old_year = int(match.group('old_year'))
1326
1327 # Replace the old year with the current year.
1328 # We matched and extracted the old copyright symbol into the variable
1329 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
1330 # We now insert it back by placing the special syntax \g<symbol>
1331 # into the replacement string.
1332 if old_year < WebSite.get_current_year():
1333 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(WebSite.get_current_year()) + " by Sean Erik"
1334 sub = pat.sub(new_copyright, line)
1335 line = sub
1336 return line
1337
1338 def rewrite_last_update_line(self, line):
1339 """Rewrite the Last Updated line if the year is out of date."""
1340
1341 # Match the last updated line and pull out the year.
1342 # last updated 01 Jan 25.
1343 p = self.user_settings.LAST_UPDATED_LINE
1344 m = p.search(line)
1345
1346 if m:
1347 last_update_year = int(m.group('year'))
1348
1349 # Convert to four digit years.
1350 if last_update_year > 90:
1351 last_update_year += 1900
1352 else:
1353 last_update_year += 2000
1354
1355 # If the year is old, rewrite to "01 Jan <current year>".
1356 if last_update_year < WebSite.get_current_year():
1357 two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1358 sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1359 line = sub
1360
1361 return line
1362
1363 def rewrite_source_file(self, file_name):
1364 """Rewrite copyright lines, last updated lines, etc."""
1365 changed = False
1366
1367 # Create a new temporary file name for the rewritten file.
1368 temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1369
1370 # Apply changes to all lines of the temporary file. Apply change functions in
1371 # the sequence listed.
1372 if self.process_lines_of_file(file_name, temp_file_name,
1373 [self.rewrite_copyright_line,
1374 self.rewrite_last_update_line,
1375 self.rewrite_email_address_line,
1376 self.rewrite_substring]):
1377 logging.debug(f"Changed (rewritten) source file {file_name:s}")
1378 changed = True
1379
1380 # Rename the temporary file to the original file name. If no changes, just delete the temp file.
1381 self.clean_up_temp_file(temp_file_name, file_name, changed)
1382
1383 return changed
1384
1385 @staticmethod
1386 def create_syntax_highlighted_code_listing(source_file_name, **kwargs):
1387 """Create a syntax highlighted source listing for the file and return its name. Return None if there is an error.
1388 Keep the same date/time as the original file."""
1389
1390 # kwargs is a dictionary for key, value in kwargs.items():
1391 # for key, value in kwargs.items():
1392 # if key in kwargs:
1393 # print( f"kwargs:" )
1394 # print( f" key = |{key}|")
1395 # print( f" value = |{value}|" )
1396 dry_run_value = kwargs.get('dry_run')
1397 dry_run = False
1398 if dry_run_value is not None and dry_run_value is True:
1399 dry_run = True
1400
1401 # Take apart the file name.
1402 file_name_without_extension = Path(source_file_name).stem
1403 file_extension = Path(source_file_name).suffix
1404
1405 # Append *.html to the source code file name. This will be the syntax highlighted code listing.
1406 syntax_highlighted_file_name = f"{source_file_name}.html"
1407
1408 # In the special case of Jupyter notebooks, use the Jupyter to HTML converter.
1409 if file_extension == ".ipynb":
1410 if dry_run:
1411 logging.debug(f"Dry run only: don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1412 return None
1413 # Python manual recommends using the run() command instead of Popen(). See https://docs.python.org/3/library/subprocess.html#subprocess.run
1414 try:
1415 shell_command = f"jupyter nbconvert {source_file_name} --to html --output {syntax_highlighted_file_name}"
1416 # Throw an exception if we can't run the process.
1417 # Capture the standard output and standar error and dump to /dev/null so it doesn't print to the command line when running this script.
1418 # Since the shell command is a single string, use shell=True in the run() command.
1419 subprocess.run([shell_command],shell=True,check=True,stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
1420 except subprocess.CalledProcessError as detail:
1421 logging.error(f"Cannot convert the Jupyter file {source_file_name:s} to a syntax highlighted file: {str(detail):s} Aborting...")
1422 return None
1423 # Otherwise, use the Pygments syntax highlighter.
1424 else:
1425 # First choose the language lexer from the file name itself if there's no extension.
1426 # Dotted file names are treated as the entire file name.
1427 match file_name_without_extension:
1428 case "makefile":
1429 lexer = MakefileLexer()
1430 case ".bash_profile"|".bashrc"|".bash_logout":
1431 lexer = BashLexer()
1432 case ".vimrc":
1433 lexer = VimLexer()
1434 case ".gitignore_global" | ".gitignore" | ".gitconfig":
1435 lexer = OutputLexer() # No formatting.
1436 case _:
1437 # Choose the language lexer from the file extension. Web stuff first, then programming languages.
1438 match file_extension:
1439 case ".html":
1440 lexer = HtmlLexer()
1441 case ".css":
1442 lexer = CssLexer()
1443 case ".js":
1444 lexer = JavascriptLexer()
1445 case ".sh":
1446 lexer = BashLexer()
1447 case ".py":
1448 lexer = PythonLexer()
1449 case ".c" | ".h":
1450 lexer = CLexer()
1451 case ".hpp" | ".cpp":
1452 lexer = CppLexer()
1453 case ".lsp":
1454 lexer = CommonLispLexer()
1455 case ".for" | ".FOR" | ".f":
1456 lexer = FortranFixedLexer() # Fixed format FORTRAN, not FORTRAN 90.
1457 case ".txt" | ".dat": # Generic data file; no formatting.
1458 lexer = OutputLexer()
1459 case ".tex":
1460 lexer = TexLexer() # LaTeX, TeX, or related files.
1461 case ".m":
1462 lexer = MatlabLexer()
1463 case ".yaml":
1464 lexer = YamlLexer()
1465 case _:
1466 logging.error(f"Can't find a lexer for file {source_file_name}. Cannot generate a syntax highlighted source listing. Aborting...")
1467 return None
1468
1469 # Read the source code file into a single string.
1470 try:
1471 with open(source_file_name, 'r') as fp:
1472 source_file_string = fp.read()
1473 except OSError as detail:
1474 logging.error(f"Cannot read the source code file {source_file_name:s} for syntax highlighting: {str(detail):s} Aborting...")
1475
1476 # Top level Pygments function generates the HTML for the highlighted code.
1477 highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1478
1479 # The style sheet is always the same for all languages.
1480 style_sheet = HtmlFormatter().get_style_defs('.highlight')
1481
1482 # Write out the syntax colored file.
1483 if dry_run:
1484 logging.debug(f"Dry run only: don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1485 return None
1486 else:
1487 try:
1488 # Write out the highlighted code listing in HTML with CSS style sheet attached.
1489 with open(syntax_highlighted_file_name, 'w') as fp:
1490 fp.write(UserSettings.BASIC_HTML_BEGIN)
1491 fp.write(style_sheet)
1492 fp.write(UserSettings.BASIC_HTML_MIDDLE)
1493 fp.write(highlighted_html_source_file_string)
1494 fp.write(UserSettings.BASIC_HTML_END)
1495 except OSError as detail:
1496 logging.error(f"Cannot write the syntax highlighted file {syntax_highlighted_file_name:s}: {str(detail):s} Aborting...")
1497 # ------- end Pygments syntax highlighter
1498
1499 # Set the syntax highlighted code file to the same modification and access time and date as the source file.
1500 file_stat = os.stat(source_file_name)
1501 os.utime(syntax_highlighted_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1502
1503 # Are the original source and the syntax highlighted code the same data and time?
1504 dates_and_times_source_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1505 dates_and_times_syntax_highlighted_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1506 if dates_and_times_source_file_name != dates_and_times_syntax_highlighted_file_name:
1507 logging.error(f"Source code and syntax highlighted source don't have the same times. source time = {dates_and_times_source_file_name.ctime():s} syntax highlighted time = {dates_and_times_syntax_highlighted_file_name.ctime():s} Aborting...")
1508 return None
1509
1510 logging.debug(f"Generated a syntax highlighted listing {syntax_highlighted_file_name:s} for the source code file {source_file_name:s} with the same time and date = {dates_and_times_source_file_name.ctime():s}")
1511 return syntax_highlighted_file_name
1512
1513# ----------------------------------------------------------------------------
1514# Subclass which knows about the remote web site.
1515# ----------------------------------------------------------------------------
1516
1517class RemoteWebSite(WebSite):
1518 """Walk the remote web directory on a web server down from the root.
1519 Use FTP commands:
1520 https://en.wikipedia.org/wiki/List_of_FTP_commands
1521 Use the Python ftp library:
1522 https://docs.python.org/3/library/ftplib.html
1523 """
1524
1525 def __init__(self, user_settings):
1526 """Connect to FTP server and list all files and directories."""
1527
1528 # Root directory of FTP server.
1529 self.root_dir = user_settings.FTP_ROOT_NAME
1530 logging.debug(f"Set the remote web site ftp root dir = {self.root_dir:s}")
1531
1532 # Connect to FTP server and log in.
1533 try:
1534 # Turn on for troubleshooting ftp on the remote server.
1535 # self.ftp.set_debuglevel( 2 )
1536 # print( f"\nTrying ftp login to server name = {user_settings.SERVER_NAME} user name = {user_settings.USER_NAME} password = {user_settings.PASSWORD_NAME}\n")
1537 self.ftp = ftplib.FTP(user_settings.SERVER_NAME)
1538 self.ftp.login(user_settings.USER_NAME, user_settings.PASSWORD_NAME)
1539 # Catch all exceptions with the parent class Exception: all built-in,
1540 # non-system-exiting exceptions are derived from this class.
1541 except Exception as detail:
1542 # Extract the string message from the exception class with str().
1543 logging.error(f"Remote web site cannot login to ftp server: {str(detail):s} Aborting...")
1544 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1545 else:
1546 logging.debug("Remote web site ftp login succeeded.")
1547
1548 logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1549
1550 # Initialize the superclass.
1551 WebSite.__init__(self, user_settings)
1552
1553 def go_to_root_dir(self, root_dir):
1554 """Go to the root directory"""
1555
1556 try:
1557 # Go to the root directory.
1558 self.ftp.cwd(root_dir)
1559 logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1560
1561 # Read it back.
1562 self.root_dir = self.ftp.pwd()
1563 logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1564
1565 except Exception as detail:
1566 logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1567 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1568
1569 def get_root_dir(self):
1570 """Get the root directory name"""
1571
1572 return self.root_dir
1573
1574 def finish(self):
1575 """Quit remote web site"""
1576 logging.debug(f"Finished with WebSite object of class {type(self)}")
1577 try:
1578 self.ftp.quit()
1579 except Exception as detail:
1580 logging.error(f"Cannot ftp quit: {str(detail):s}")
1581
1582 def one_level_down(self, d):
1583 """List files and directories in a subdirectory using ftp"""
1584
1585 directories = []
1586 files = []
1587
1588 try:
1589 # ftp listing from current dir.
1590 logging.debug(f"RemoteWebSite.one_level_down(): \tftp cwd: {d:s}")
1591 self.ftp.cwd(d)
1592 dir_list = []
1593
1594 # Use the nonstandard -a option in LIST to show all the hidden .* files.
1595 # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1596 # Note the second argument requires a callback function.
1597 self.ftp.retrlines('LIST -a', dir_list.append)
1598
1599 except Exception as detail:
1600 logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}: {str(detail):s} Aborting...")
1601 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1602
1603 for line in dir_list:
1604 logging.debug(f"RemoteWebSite.one_level_down(): \tftp LIST: {line:s}")
1605
1606 # Line should at least have the minimum FTP information.
1607 if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1608 # Parse the FTP LIST and put the pieces into file_info.
1609 file_info = self.parse_ftp_list(line)
1610 logging.debug(f"RemoteWebSite.one_level_down(): \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1611
1612 # Skip over the UNIX hidden files for current and parent directories . and .. Also skip over any NULL file names.
1613 if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1614 logging.debug(f"RemoteWebSite.one_level_down(): \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1615 pass
1616 # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1617 elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1618 dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1619 logging.debug(f"RemoteWebSite.one_level_down(): \tftp dir (full path): {dirname:s}")
1620 directories.append(dirname)
1621 # For a file: Add the full path prefix from the root to the file name.
1622 else:
1623 file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1624 logging.debug(f"RemoteWebSite.one_level_down(): \tftp file (full path):\
1625 {file_info[self.user_settings.FILE_NAME]:s}")
1626 files.append(file_info)
1627 else:
1628 logging.error(f"RemoteWebSite.one_level_down(): \tFTP LIST line is too short: {line:s}")
1629
1630 directories.sort()
1631 files.sort()
1632
1633 return directories, files
1634
1635 def modtime(self, f):
1636 """Get the modification time of a file via ftp. Return 0 if ftp cannot get it."""
1637 modtime = 0
1638
1639 try:
1640 response = self.ftp.sendcmd('MDTM ' + f)
1641 # MDTM returns the last modified time of the file in the format
1642 # "213 YYYYMMDDhhmmss \r\n <error-response>
1643 # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1644 # error-response is 550 for info not available, and 500 or 501 if command cannot
1645 # be parsed.
1646 if response[:3] == '213':
1647 modtime = response[4:]
1648 except ftplib.error_perm as detail:
1649 logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1650 modtime = 0
1651
1652 return modtime
1653
1654 def parse_ftp_list(self, line):
1655 """Parse the ftp file listing and return file name, datetime and file size.
1656
1657 An FTP LIST command will give output which looks like this for a file:
1658
1659 -rw-r--r-- 1 1000 free 4084 Jul 18 16:55 sparkCoil.png
1660
1661 and for a directory:
1662
1663 drwxr-xr-x 2 1000 free 4096 Jul 18 16:36 ReadingList
1664
1665 FTP uses UTC for its listings; the conversion to local time is done by the OS.
1666 We can have problems on New Year's Eve. For example, the local file date/time is
1667
1668 Mon Jan 1 06:23:12 2018
1669
1670 But the remote file date/time from FTP listing doesn't show a year even though we
1671 know it was written to the server in 2017.
1672
1673 Mon Dec 31 03:02:00
1674
1675 So we default the remote file year to current year 2018 and get
1676
1677 Mon Dec 31 03:02:00 2018
1678
1679 Now we think that the remote file is newer by 363.860278 days.
1680 """
1681
1682 # Find out if we've a directory or a file.
1683 if line[0] == 'd':
1684 dir_or_file = FileType.DIRECTORY
1685 else:
1686 dir_or_file = FileType.FILE
1687
1688 pattern = self.user_settings.FTP_LISTING
1689
1690 # Sensible defaults.
1691 filesize = 0
1692 filename = ""
1693 # Default the time to midnight.
1694 hour = 0
1695 minute = 0
1696 seconds = 0
1697 # Default the date to Jan 1 of the current year.
1698 month = 1
1699 day = 1
1700 year = WebSite.get_current_year()
1701
1702 # Extract time and date from the ftp listing.
1703 match = pattern.search(line)
1704
1705 if match:
1706 filesize = int(match.group('bytes'))
1707 month = self.user_settings.monthToNumber[match.group('mon')]
1708 day = int(match.group('day'))
1709
1710 # Remote file listing contains the year. The FTP listing will omit the hour and minute.
1711 if match.group('year'):
1712 year = int(match.group('year'))
1713 logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1714 else:
1715 # Remote file listing omits the year. Default the year to the current UTC time year.
1716 # That may be incorrect (see comments above).
1717 year = WebSite.get_current_year()
1718 logging.debug(f"ftp is missing the year; use the current year = {year}")
1719
1720 # If the FTP listing has the hour and minute, it will omit the year.
1721 if match.group('hour') and match.group('min'):
1722 hour = int(match.group('hour'))
1723 minute = int(match.group('min'))
1724 logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1725
1726 filename = match.group('filename')
1727
1728 # Package up the time and date nicely.
1729 # Note if we didn't get any matches, we'll default the remote date and
1730 # time to Jan 1 midnight of the current year.
1731 d = datetime.datetime(year, month, day, hour, minute, seconds)
1732
1733 return [filename, dir_or_file, d, filesize]
1734
1735# ----------------------------------------------------------------------------
1736# Class for synchronizing local and remote web sites.
1737# ----------------------------------------------------------------------------
1738
1739class UpdateWeb(object):
1740 """Given previously scanned local and remote directories, update the remote website."""
1741
1742 def __init__(
1743 self,
1744 user_settings,
1745 local_directory_list,
1746 local_file_info,
1747 remote_directory_list,
1748 remote_file_info):
1749 """Connect to remote site. Accept previously scanned local and remote files and directories."""
1750
1751 # Initialize from args.
1752 self.user_settings = user_settings
1753 self.local_directory_list = local_directory_list
1754 self.remote_directory_list = remote_directory_list
1755 self.local_file_info = local_file_info
1756 self.remote_file_info = remote_file_info
1757
1758 # Initialize defaults.
1759 self.local_files_list = []
1760 self.remote_files_list = []
1761 self.local_file_to_size = {}
1762 self.local_file_to_date_time = {}
1763 self.remote_file_to_date_time = {}
1764 self.local_only_dirs = []
1765 self.local_only_files = []
1766 self.remote_only_dirs = []
1767 self.remote_only_files = []
1768 self.common_files = []
1769
1770 # Connect to FTP server and log in.
1771 try:
1772 self.ftp = ftplib.FTP(self.user_settings.SERVER_NAME)
1773 self.ftp.login(self.user_settings.USER_NAME, self.user_settings.PASSWORD_NAME)
1774 except Exception as detail:
1775 logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1776 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1777 else:
1778 logging.debug("ftp login succeeded.")
1779
1780 logging.debug(f"ftp server welcome message: {self.ftp.getwelcome():s}")
1781
1782 # Local root directory.
1783 self.local_root_dir = self.user_settings.local_root_dir
1784 logging.debug(f"Local root directory: {self.local_root_dir:s}")
1785
1786 # Root directory of FTP server.
1787 self.ftp_root_dir = self.user_settings.FTP_ROOT_NAME
1788 logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1789
1790 # Transform KB string to integer bytes. e.g. "200" => 2048000
1791 self.file_size_limit = int(self.user_settings.FILE_SIZE_LIMIT_NAME) * 1024
1792
1793 try:
1794 # Go to the root directory.
1795 self.ftp.cwd(self.ftp_root_dir)
1796
1797 # Read it back.
1798 self.ftp_root_dir = self.ftp.pwd()
1799 logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1800 except Exception as detail:
1801 logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1802
1803 def append_root_dir(self, root_dir, name):
1804 """Append the root directory to a path"""
1805
1806 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1807 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1808 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1809 return root_dir + name
1810 else:
1811 return root_dir + "/" + name
1812
1813 def file_info(self):
1814 """Create lists of file names from the file information. Also create dictionaries which map file names onto
1815 dates, times, and sizes."""
1816
1817 # Extract file names.
1818 self.local_files_list = [
1819 file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1820 self.remote_files_list = [
1821 file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1822
1823 # Use a dictionary comprehension to create key/value pairs,
1824 # (file name, file date/time)
1825 # which map file names onto date/time.
1826 self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1827 self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1828
1829 # Dictionary comprehension creates a mapping of local file names onto file sizes.
1830 self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1831
1832 def update(self):
1833 """Scan through the local website, cleaning it up.
1834 Go to remote website on my servers and synchronize all files."""
1835
1836 self.file_info()
1837
1838 # Which files and directories are different.
1839 self.changes()
1840
1841 # Synchronize with the local web site.
1842 self.synchronize()
1843
1844 def changes(self):
1845 """Find the set of different directories and files on local and remote."""
1846
1847 # Add all directories which are only on local to the dictionary.
1848 dir_to_type = {
1849 d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1850
1851 # Scan through all remote directories, adding those only on remote or
1852 # on both.
1853 for d in self.remote_directory_list:
1854 if d in dir_to_type:
1855 dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1856 else:
1857 dir_to_type[d] = FileType.ON_REMOTE_ONLY
1858
1859 # Add all files which are only on local to the dictionary.
1860 file_to_type = {
1861 f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1862
1863 # Scan through all remote files, adding those only on remote or on
1864 # both.
1865 for f in self.remote_files_list:
1866 if f in file_to_type:
1867 file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1868 else:
1869 file_to_type[f] = FileType.ON_REMOTE_ONLY
1870
1871 logging.debug("Raw dictionary dump of directories")
1872 for k, v in dir_to_type.items():
1873 logging.debug(f"\t dir: {str(k):s} type: {str(v):s}")
1874
1875 logging.debug("Raw dictionary dump of files")
1876 for k, v in file_to_type.items():
1877 logging.debug(f"\t file: {str(k):s} type: {str(v):s}")
1878
1879 # List of directories only on local. Keep the ordering.
1880 self.local_only_dirs = [
1881 d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1882
1883 # List of directories only on remote. Keep the ordering.
1884 self.remote_only_dirs = [
1885 d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1886
1887 # We don't care about common directories, only their changed files, if
1888 # any.
1889
1890 # List of files only on local. Keep the ordering.
1891 self.local_only_files = [
1892 f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1893
1894 # List of files only on remote. Keep the ordering.
1895 self.remote_only_files = [
1896 f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1897
1898 # List of common files on both local and remote. Keep the ordering.
1899 self.common_files = [
1900 f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1901
1902 logging.debug("*** Directories only on local ******************************")
1903 for d in self.local_only_dirs:
1904 logging.debug(f"\t {d:s}")
1905
1906 logging.debug("*** Directories only on remote ******************************")
1907 for d in self.remote_only_dirs:
1908 logging.debug(f"\t {d:s}")
1909
1910 logging.debug("*** Files only on local ******************************")
1911 for f in self.local_only_files:
1912 logging.debug(f"\t {f:s}")
1913
1914 logging.debug("*** Files only on remote ******************************")
1915 for f in self.remote_only_files:
1916 logging.debug(f"\t {f:s}")
1917
1918 logging.debug("*** Common files ******************************")
1919 for f in self.common_files:
1920 logging.debug(f"name {f:s}")
1921 logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1922 logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1923
1924 def synchronize(self):
1925 """Synchronize files and subdirectories in the remote directory with the local directory."""
1926
1927 # If we have the same files in local and remote, compare their times
1928 # and dates.
1929 for f in self.common_files:
1930 local_file_time = self.local_file_to_date_time[f]
1931 remote_file_time = self.remote_file_to_date_time[f]
1932
1933 # What's the time difference?
1934 time_delta = remote_file_time - local_file_time
1935 # How much difference, either earlier or later?
1936 seconds_different = abs(time_delta.total_seconds())
1937 minutes_different = seconds_different / 60.0
1938 hours_different = minutes_different / 60.0
1939 days_different = hours_different / 24.0
1940
1941 # Assume no upload initially.
1942 upload_to_host = False
1943
1944 logging.debug(f"Common file: {f:s}.")
1945
1946 # Remote file time is newer.
1947 # Allow 200 characters
1948 # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
1949
1950 if remote_file_time > local_file_time:
1951 # Remote file time is MUCH newer: suspect time is out of joint on the server, so upload local local file to be safe.
1952 if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
1953 logging.error(f"Remote file {f:s} is MUCH newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes]. Upload the file to be safe.")
1954 logging.error(f"\tlocal time {local_file_time.ctime():s}")
1955 logging.error(f"\tremote time {remote_file_time.ctime():s}")
1956
1957 # Set the local file to the current time.
1958 full_file_name = self.append_root_dir(
1959 self.local_root_dir, f)
1960 if os.path.exists(full_file_name):
1961 # Change the access and modify times of the file to the current time.
1962 os.utime(full_file_name, None)
1963 logging.error(f"Touching local file {full_file_name:s} to make it the current time")
1964
1965 upload_to_host = True
1966 # Remote file time is newer, but not by much. Let's just assume a slight time mismatch on the server. Don't upload.
1967 else:
1968 logging.warning(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
1969 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1970 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1971 upload_to_host = False
1972
1973 # Local file time is newer.
1974 elif local_file_time > remote_file_time:
1975 # Local file time slightly newer than the remote file. So we are pretty sure the local file really got changed vs the server file.
1976 if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
1977 logging.warning(f"Local file {f:20s} is SLIGHTLY newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD} minutes]. Uploading to remote server.")
1978 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1979 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1980 upload_to_host = True
1981 else:
1982 logging.warning(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
1983 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1984 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1985 upload_to_host = False
1986
1987 # Cancel the upload if the file is too big for the server.
1988 size = self.local_file_to_size[f]
1989 if size >= self.file_size_limit:
1990 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
1991 upload_to_host = False
1992
1993 # Finally do the file upload.
1994 if upload_to_host:
1995 logging.debug(f"Uploading changed file {f:s}")
1996 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
1997 print(f"Uploading changed file {f:s}... ", end='', flush=True)
1998 self.upload(f)
1999
2000 # Remote directory is not in local. Delete it.
2001 for d in self.remote_only_dirs:
2002 logging.debug(f"Deleting remote only directory {d:s}")
2003 print(f"Deleting remote only directory {d:s}... ", end='', flush=True)
2004 self.rmdir(d)
2005
2006 # Local directory missing on remote. Create it.
2007 # Due to breadth first order scan, we'll create parent directories
2008 # before child directories.
2009 for d in self.local_only_dirs:
2010 logging.debug(f"Only on local. Creating new remote dir {d:s}.")
2011 print(f"Creating new remote directory {d:s}... ", end='', flush=True)
2012 self.mkdir(d)
2013
2014 # Local file missing on remote. Upload it.
2015 for f in self.local_only_files:
2016 logging.debug(f"Local only file. Uploading {f:s} to remote.")
2017
2018 # But cancel the upload if the file is too big for the server.
2019 size = self.local_file_to_size[f]
2020 if size >= self.file_size_limit:
2021 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d};"
2022 f" too large for server, limit is {self.file_size_limit:d} bytes")
2023 else:
2024 logging.debug(f"Uploading new file {f:s}")
2025 print(f"Uploading new file {f:s}... ", end='', flush=True)
2026 self.upload(f)
2027
2028 # Remote contains a file not present on the local. Delete the file.
2029 for f in self.remote_only_files:
2030 logging.debug(f"Remote only file. Deleting remote file {f:s}.")
2031 print(f"Deleting remote file {f:s}... ", end='', flush=True)
2032 self.del_remote(f)
2033
2034 def del_remote(self, relative_file_path):
2035 """Delete a file using ftp."""
2036
2037 logging.debug(f"del_remote(): \trelative file path name: {relative_file_path:s}")
2038
2039 # Parse the relative file path into file name and relative directory.
2040 relative_dir, file_name = os.path.split(relative_file_path)
2041 logging.debug(f"del_remote(): \tfile name: {file_name:s}")
2042 logging.debug(f"del_remote(): \trelative dir: {relative_dir:s}")
2043 logging.debug(f"del_remote(): \tremote root dir: {self.ftp_root_dir:s}")
2044
2045 try:
2046 # Add the remote root path and go to the remote directory.
2047 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2048 logging.debug(f"del_remote(): \tftp cd remote dir: {remote_dir:s}")
2049 self.ftp.cwd(remote_dir)
2050 except Exception as detail:
2051 logging.error(f"del_remote(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2052 else:
2053 try:
2054 logging.debug(f"del_remote(): \tftp rm: {file_name:s}")
2055
2056 # Don't remove zero length file names.
2057 if len(file_name) > 0:
2058 self.ftp.delete(file_name)
2059 else:
2060 logging.warning( "fdel_remote(): skipping ftp delete; file NAME {file_name:s} had zero length")
2061 except Exception as detail:
2062 logging.error(f"del_remote(): \tCannot ftp rm: {str(detail):s}")
2063
2064 def mkdir(self, relative_dir):
2065 """Create new remote directory using ftp."""
2066
2067 logging.debug(f"mkdir(): \trelative dir path name: {relative_dir:s}")
2068 logging.debug(f"mkdir(): \tremote root dir: {self.ftp_root_dir:s}")
2069
2070 # Parse the relative dir path into prefix dir and suffix dir.
2071 path, d = os.path.split(relative_dir)
2072 logging.debug(f"mkdir(): \tremote prefix dir: {path:s}")
2073 logging.debug(f"mkdir(): \tremote dir: {d:s}")
2074
2075 try:
2076 # Add the remote root path and go to the remote directory.
2077 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2078 logging.debug(f"mkdir(): \tftp cd remote dir: {remote_dir:s}")
2079 self.ftp.cwd(remote_dir)
2080 except Exception as detail:
2081 logging.error(f"mkdir(): \tCannot ftp chrdir: {str(detail):s} Skipping...")
2082 else:
2083 try:
2084 logging.debug(f"mkdir(): \tftp mkd: {d:s}")
2085 self.ftp.mkd(d)
2086 except Exception as detail:
2087 logging.error(f"mkdir(): \tCannot ftp mkdir: {str(detail):s}")
2088
2089 def rmdir(self, relative_dir):
2090 """Delete an empty directory using ftp."""
2091
2092 logging.debug(f"rmdir(): \tintermediate dir path name: {relative_dir:s}")
2093 logging.debug(f"rmdir(): \tremote root dir: {self.ftp_root_dir:s}")
2094
2095 # Parse the relative dir path into prefix dir and suffix dir.
2096 path, d = os.path.split(relative_dir)
2097 logging.debug(f"rmdir(): \tremote prefix dir: {path:s}")
2098 logging.debug(f"rmdir(): \tremote dir: {d:s}")
2099
2100 try:
2101 # Add the remote root path and go to the remote directory.
2102 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2103 logging.debug(f"rmdir(): \tftp cd remote dir: {remote_dir:s}")
2104 self.ftp.cwd(remote_dir)
2105 except Exception as detail:
2106 logging.error(f"rmdir(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2107 else:
2108 try:
2109 logging.debug(f"rmdir(): \tftp rmd: {d:s}")
2110 self.ftp.rmd(d)
2111 except Exception as detail:
2112 logging.error(f"rmdir(): \tCannot ftp rmdir dir {d:s}: {str(detail):s}. Directory is probably not empty. Do a manual delete.")
2113
2114 def download(self, relative_file_path):
2115 """Download a binary file using ftp."""
2116
2117 logging.debug(f"download(): \tfile name: {relative_file_path:s}")
2118
2119 # Parse the relative file path into file name and relative directory.
2120 relative_dir, file_name = os.path.split(relative_file_path)
2121 logging.debug(f"download(): \tfile name: {file_name:s}")
2122 logging.debug(f"download(): \trelative dir: {relative_dir:s}")
2123 logging.debug(f"download(): \troot dir: {self.ftp_root_dir:s}")
2124
2125 # Add the remote root path and go to the remote directory.
2126 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2127 logging.debug(f"download(): \tftp cd remote dir: {remote_dir:s}")
2128
2129 try:
2130 self.ftp.cwd(remote_dir)
2131 except Exception as detail:
2132 logging.error(f"download(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2133 else:
2134 # Add the local root path to get the local file name.
2135 # Open local binary file to write into.
2136 local_file_name = self.append_root_dir(
2137 self.local_root_dir, relative_file_path)
2138 logging.debug(f"download(): \topen local file name: {local_file_name:s}")
2139 try:
2140 f = open(local_file_name, "wb")
2141 try:
2142 # Calls f.write() on each block of the binary file.
2143 # ftp.retrbinary( "RETR " + file_name, f.write )
2144 pass
2145 except Exception as detail:
2146 logging.error(f"download(): \tCannot cannot ftp retrbinary: {str(detail):s}")
2147 f.close()
2148 except IOError as detail:
2149 logging.error(f"download(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2150
2151 def upload(self, relative_file_path):
2152 """Upload a binary file using ftp."""
2153
2154 logging.debug(f"upload(): \trelative file path name: {relative_file_path:s}")
2155
2156 # Parse the relative file path into file name and relative directory.
2157 relative_dir, file_name = os.path.split(relative_file_path)
2158 logging.debug(f"upload(): \tfile name: {file_name:s}")
2159 logging.debug(f"upload(): \trelative dir: {relative_dir:s}")
2160 logging.debug(f"upload(): \tremote root dir: {self.ftp_root_dir:s}")
2161
2162 # Add the remote root path and go to the remote directory.
2163 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2164 logging.debug(f"upload(): \tftp cd remote dir: {remote_dir:s}")
2165
2166 try:
2167 self.ftp.cwd(remote_dir)
2168 except Exception as detail:
2169 logging.error(f"upload(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2170 else:
2171 # Add the local root path to get the local file name.
2172 # Open local binary file to read from.
2173 local_file_name = self.append_root_dir(
2174 self.local_root_dir, relative_file_path)
2175 logging.debug(f"upload(): \topen local file name: {local_file_name:s}")
2176
2177 try:
2178 f = open(local_file_name, "rb")
2179 try:
2180 # f.read() is called on each block of the binary file until
2181 # EOF.
2182 logging.debug(f"upload(): \tftp STOR file {file_name:s}")
2183 self.ftp.storbinary("STOR " + file_name, f)
2184 except Exception as detail:
2185 logging.error(f"upload(): \tCannot ftp storbinary: {str(detail):s}")
2186 f.close()
2187 except IOError as detail:
2188 logging.error(f"upload(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2189
2190 def finish(self):
2191 """Log out of an ftp session"""
2192 logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2193 try:
2194 self.ftp.quit()
2195 except Exception as detail:
2196 logging.error(f"Cannot ftp quit because {str(detail):s}")
2197
2198# ----------------------------------------------------------------------------
2199# Main function
2200# ----------------------------------------------------------------------------
2201
2202def main(raw_args=None):
2203 """Main program. Clean up and update my website."""
2204
2205 # Print the obligatory legal notice.
2206 print("""
2207 updateweb Version 7.3 - A Python utility program which maintains my web site.
2208 Copyright (C) 2007-2025 by Sean Erik O'Connor. All Rights Reserved.
2209
2210 It deletes temporary files, rewrites old copyright lines and email address
2211 lines in source files, then synchronizes all changes to my web sites.
2212
2213 updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2214 GNU General Public License. This is free software, and you are welcome
2215 to redistribute it under certain conditions; see the GNU General Public
2216 License for details.
2217 """)
2218
2219 # Put ALL the main code into a try block!
2220 try:
2221 # ---------------------------------------------------------------------
2222 # Load default settings and start logging.
2223 # ---------------------------------------------------------------------
2224
2225 # Default user settings.
2226 user_settings = UserSettings()
2227
2228 print( f"Running main( {raw_args} ) Python version\
2229 {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2230 local web directory\
2231 {user_settings.local_root_dir}\n")
2232 # Get command line options such as --verbose. Pass them back as flags in
2233 # user_settings.
2234 CommandLineSettings(user_settings, raw_args)
2235
2236 # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2237 if user_settings.UNITTEST:
2238 suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2239 unittest.TextTestRunner(verbosity=2).run(suite)
2240 # We are done!
2241 print(" ...done!", flush=True)
2242 return
2243
2244 # Start logging to file.
2245 if user_settings.VERBOSE:
2246 # Turn on logging for DEBUG and higher: DEBUG, INFO, WARNING, ERROR, CRITICAL messages.
2247 loglevel = logging.DEBUG
2248 else:
2249 # Turn on logging for WARNING and higher: WARNING, ERROR and CRITICAL messages.
2250 loglevel = logging.WARNING
2251
2252 # Pick the log file name on the host.
2253 if user_settings.CLEAN:
2254 user_settings.LOGFILENAME = "/private/logLocal.txt"
2255 else:
2256 user_settings.LOGFILENAME = "/private/logRemote.txt"
2257
2258 # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2259 if not user_settings.MATHJAX:
2260 user_settings.DIR_TO_SKIP += "|mathjax"
2261 else:
2262 mathJaxPostUploadingAdvice = \
2263 [ "Processing and uploading new or changed mathjax files.",
2264 "If you are loading MathJax for the first time --- don't forget to upload the file .htaccess manually using FileZilla or another ftp client... ",
2265 "FTP won't delete remote dir which are nonempty. You might have to run this program several times to delete all subdirectories before the parent dir can be deleted. Or you can manually delete with FTP.",
2266 "If using FileZilla for manual deletion, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections... ",
2267 "\n"
2268 ]
2269 print( mathJaxPostUploadingAdvice[0], flush=True)
2270 print( mathJaxPostUploadingAdvice[1], flush=True)
2271 print( mathJaxPostUploadingAdvice[2], flush=True)
2272 print( mathJaxPostUploadingAdvice[3], flush=True)
2273 print( mathJaxPostUploadingAdvice[4], flush=True)
2274 logging.debug( mathJaxPostUploadingAdvice[0], flush=True)
2275 logging.debug( mathJaxPostUploadingAdvice[1], flush=True)
2276 logging.debug( mathJaxPostUploadingAdvice[2], flush=True)
2277 logging.debug( mathJaxPostUploadingAdvice[3], flush=True)
2278 logging.debug( mathJaxPostUploadingAdvice[4], flush=True)
2279
2280 # Configure the logging and start it.
2281 logging.basicConfig( level=loglevel, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename=user_settings.local_root_dir + user_settings.LOGFILENAME, filemode='w')
2282 logging.debug("********** Begin logging")
2283
2284 # ---------------------------------------------------------------------
2285 # Scan the local website, finding out all files and directories.
2286 # ---------------------------------------------------------------------
2287
2288 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
2289 print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}... ", end='', flush=True)
2290 logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2291
2292 local = LocalWebSite(user_settings)
2293 local.scan()
2294
2295 # ---------------------------------------------------------------------
2296 # Clean up local website.
2297 # ---------------------------------------------------------------------
2298
2299 # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2300 print("Cleaning local web site... ", end='', flush=True)
2301 logging.debug("========================== Cleaning the local web site")
2302 local.clean()
2303
2304 # We are done with the first scan of the local web site and will dispose of it.
2305 local.finish()
2306 del local
2307
2308 # ---------------------------------------------------------------------
2309 # Rescan the local website since there will be changes to source
2310 # files from the clean up stage.
2311 # ---------------------------------------------------------------------
2312
2313 print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}... ", end='', flush=True)
2314 logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2315
2316 local = LocalWebSite(user_settings)
2317
2318 local.scan()
2319
2320 # ---------------------------------------------------------------------
2321 # List all the local directories and files and their sizes.
2322 # ---------------------------------------------------------------------
2323
2324 # Local website directories.
2325 local_directory_list = local.directories
2326 logging.debug("********** List of all the Local Directories")
2327 for d in local_directory_list:
2328 logging.debug(f"\t {d:s}")
2329
2330 # Generate lists of the local website filenames only, and their sizes in byteskjjjj
2331 local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2332 total_number_of_files = len( local_files_name_size_pairs )
2333 logging.debug(f"********** List of all the Local Files from largest to smallest. There are {total_number_of_files:15d} files.")
2334 local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2335
2336 # Local website filenames only, and their dates and times.
2337 local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2338 logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2339 for file_datetime_pair in local_file_datetime_pairs:
2340 logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2341
2342 # Total number of bytes in the local files.
2343 total_number_of_bytes = 0
2344 for file_size_pair in local_files_name_size_pairs:
2345 logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2346 total_number_of_bytes += file_size_pair[1]
2347 logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2348
2349 local.finish()
2350
2351 if user_settings.CLEAN:
2352 logging.debug("========================== Done with local file and directory cleanup...")
2353 del local
2354 print("...done!", flush=True)
2355 return
2356
2357 # ---------------------------------------------------------------------
2358 # Scan the remote hosted web site.
2359 # ---------------------------------------------------------------------
2360
2361 print("Scanning remote web site... ", end='', flush=True)
2362 logging.debug("========================== Scanning the remote web site...")
2363
2364 # Pick which website to update.
2365 logging.debug("Connecting to primary remote site.")
2366 remote = RemoteWebSite(user_settings)
2367 remote.scan()
2368 remote.finish()
2369
2370 # ---------------------------------------------------------------------
2371 # List all the remote server directories and files and their sizes.
2372 # ---------------------------------------------------------------------
2373
2374 remote_directory_list = remote.directories
2375 logging.debug("********** Remote Directories")
2376 for d in remote_directory_list:
2377 logging.debug(f"\t {d:s}")
2378
2379 # Local website filenames only, and their sizes in bytes.
2380 remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2381 total_number_of_files = len( remote_files_name_size_list )
2382 logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2383 remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2384 total_number_of_bytes = 0
2385 for file_size in remote_files_name_size_list:
2386 logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2387 total_number_of_bytes += file_size[1]
2388 logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2389
2390 # ---------------------------------------------------------------------
2391 # Synchronize the local and remote web sites.
2392 # ---------------------------------------------------------------------
2393
2394 print("Synchronizing remote and local web sites... ", end='', flush=True)
2395 logging.debug("========================= Synchronizing remote and local web sites...")
2396
2397 # Primary website.
2398 logging.debug("Connecting to primary remote site for synchronization.")
2399 sync = UpdateWeb(user_settings,
2400 local.directories,
2401 local.files,
2402 remote.directories,
2403 remote.files)
2404
2405 sync.update()
2406 sync.finish()
2407
2408 del sync
2409 del remote
2410 del local
2411 print("...done!", flush=True)
2412
2413 except UpdateWebException as detail:
2414 logging.error(f"Couldn't update the web directory: {str(detail):s}. Aborting...")
2415
2416 except RecursionError as detail:
2417 logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}. Aborting...")
2418
2419if __name__ == '__main__':
2420 """Python executes all code in this file. Finally, we come here.
2421
2422 * If we are executing this file as a standalone Python script,
2423 the name of the current module is set to __main__ and thus we'll call the main() function.
2424
2425 * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2426
2427 import updateweb
2428 updateweb.main(["--test"])"""
2429
2430 main()