1#!/usr/bin/env python3
2# ============================================================================
3#
4# NAME
5#
6# updateweb.py
7#
8# DESCRIPTION
9#
10# Python script which updates my web sites.
11#
12# It does miscellaneous cleanup on my local copy of the web site on disk,
13# including updating copyright information, then synchronizes the local
14# copy to my remote server web sites using FTP.
15#
16# USAGE
17#
18# It's best to use the associated makefile.
19# But you can call this Python utility from the command line,
20#
21# $ python updateweb.py Clean up my local copy, then use it
22# to update my remote web server site.
23# Log warnings and errors.
24# $ python updateweb.py -v Same, but log debug messages also.
25# $ python updateweb.py -c Clean up my local copy only.
26# $ python updateweb.py -t Run unit tests only.
27# $ python updateweb.py -m Upload MathJax files (only need to do this once).
28#
29# We get username and password information from the file PARAMETERS_FILE.
30#
31# Logs are written to the files,
32#
33# logLocal.txt Local web site cleanup log.
34# logRemote.txt Remote web server update log.
35#
36# AUTHOR
37#
38# Sean E. O'Connor 23 Aug 2007 Version 1.0 released.
39#
40# LEGAL
41#
42# updateweb.py Version 7.1 - A Python utility program which maintains my web site.
43# Copyright (C) 2007-2024 by Sean Erik O'Connor. All Rights Reserved.
44#
45# This program is free software: you can redistribute it and/or modify
46# it under the terms of the GNU General Public License as published by
47# the Free Software Foundation, either version 3 of the License, or
48# (at your option) any later version.
49#
50# This program is distributed in the hope that it will be useful,
51# but WITHOUT ANY WARRANTY; without even the implied warranty of
52# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
53# GNU General Public License for more details.
54#
55# You should have received a copy of the GNU General Public License
56# along with this program. If not, see <http://www.gnu.org/licenses/>.
57#
58# The author's address is seanerikoconnor!AT!gmail!DOT!com
59# with !DOT! replaced by . and the !AT! replaced by @
60#
61# NOTES
62#
63# DOCUMENTATION
64#
65# Python interpreter: https://www.python.org/
66# Python tutorial and reference: https://docs.python.org/lib/lib.html
67# Python debugger: https://docs.python.org/3/library/pdb.html
68# Python regular expression howto: https://docs.python.org/3.7/howto/regex.html
69#
70# ============================================================================
71
72# ----------------------------------------------------------------------------
73# Load Python Packages
74# ----------------------------------------------------------------------------
75
76# OS stuff
77import sys
78import os
79import argparse
80import shutil
81from pathlib import Path
82
83# Regular expressions
84import re
85
86# FTP stuff
87import ftplib
88
89# Date and time
90import time
91import stat
92import datetime
93
94# Logging
95import logging
96
97# Unit testing
98import unittest
99
100# Enumerated types (v3.4)
101from enum import Enum
102from typing import List, Any
103
104# YAML configuration files (a superset of JSON!)
105import yaml
106# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
107try:
108 from yaml import CLoader as Loader
109except ImportError:
110 from yaml import Loader
111
112# Python syntax highlighter. See https://pygments.org
113from pygments import highlight
114from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
115from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
116from pygments.formatters import HtmlFormatter
117
118
119# ----------------------------------------------------------------------------
120# Custom Top Level Exceptions.
121# ----------------------------------------------------------------------------
122
123class UpdateWebException(Exception):
124 """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
125 Derive from Exception as recommended by Python manual"""
126 pass
127
128# ----------------------------------------------------------------------------
129# User settings.
130# ----------------------------------------------------------------------------
131
132class TreeWalkSettings(Enum):
133 """Enum types for how to walk the directory tree."""
134 BREADTH_FIRST_SEARCH = 1
135 DEPTH_FIRST_SEARCH = 2
136
137class FileType(Enum):
138 """'Enum' types for properties of directories and files."""
139 DIRECTORY = 0
140 FILE = 1
141 ON_LOCAL_ONLY = 2
142 ON_REMOTE_ONLY = 3
143 ON_BOTH_LOCAL_AND_REMOTE = 4
144
145class UserSettings:
146 """Megatons of user selectable settings."""
147 # Logging control.
148 LOGFILENAME = ""
149 VERBOSE = False # Verbose mode. Prints out everything.
150 CLEAN = False # Clean the local website only.
151 UNITTEST = False # Run a unit test of a function.
152 MATHJAX = False # Process and upload MathJax files to server.
153
154 # When diving into the MathJax directory, web walking the deep directories
155 # may exceed Python's default recursion limit of 1000.
156 RECURSION_DEPTH = 5000
157 sys.setrecursionlimit(RECURSION_DEPTH)
158
159 # Fields in the file information (file_info) structure.
160 # For example, file_info =
161 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
162 # 1, -- Enum type: Is it a file? dir? on local? on remote? on both?
163 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a datetime class.
164 # 4675] -- File size in bytes.
165 FILE_NAME = 0
166 FILE_TYPE = 1
167 FILE_DATE_TIME = 2
168 FILE_SIZE = 3
169
170 # Server settings.
171 SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
172 SERVER_NAME = None
173 USER_NAME = None
174 PASSWORD_NAME = None
175 FTP_ROOT_NAME = None
176 FILE_SIZE_LIMIT_NAME = None
177
178 # Map month names onto numbers.
179 monthToNumber = {
180 'Jan': 1,
181 'Feb': 2,
182 'Mar': 3,
183 'Apr': 4,
184 'May': 5,
185 'Jun': 6,
186 'Jul': 7,
187 'Aug': 8,
188 'Sep': 9,
189 'Oct': 10,
190 'Nov': 11,
191 'Dec': 12}
192
193 # List of directories to skip over when processing or uploading the web page.
194 # Some are private but most are dir of temporary files.
195 # They will be listed as WARNING in the log.
196 # Examples:
197 # My private admin settings directory.
198 # Git or SVN local admin directories.
199 # Compile build directories fromXCode.
200 # PyCharm build directories.
201 # Python cache directories.
202 # Jupyter checkpoint directories.
203 # XCode temporary file crap.
204 DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
205
206 # List of files to skip when processing or uploading to the web page.
207 # They will be listed as WARNING in the log.
208 # Examples:
209 # MathJax yml file.
210 # .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
211 FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
212
213 # File extension for text files.
214 TEXT_FILE_EXT = ".txt"
215
216 # Suffixes for temporary files which will be deleted during the cleanup
217 # phase.
218 TEMP_FILE_SUFFIXES = r""" # Use Python raw strings.
219 \. # Match the dot in the file name.
220 # Now begin matching the file name suffix.
221 # (?: non-capturing match for the regex inside the parentheses,
222 # i.e. matching string cannot be retrieved later.
223 # Now match any of the following file extensions:
224 (?: o | obj | lib | exe | # Object files generated by C, C++, etc compilers
225 pyc | # Object file generated by the Python compiler
226 ilk | pdb | sup | # Temp files from VC++ compiler
227 idb | ncb | opt | plg | # Temp files from VC++ compiler
228 sbr | bsc | map | bce | # Temp files from VC++ compiler
229 res | aps | dep | db | # Temp files from VC++ compiler
230 jbf | # Paintshop Pro
231 class | jar | # Java compiler
232 log | # WS_FTP
233 fas | # CLISP compiler
234 swp | swo | # Vim editor
235 aux | # TeX auxilliary files.
236 DS_Store | _\.DS_Store | # macOS finder folder settings.
237 _\.Trashes | # macOS recycle bin
238 gdb_history) # GDB history
239 $ # Now we should see only the end of line.
240 """
241
242 # Special case: Vim temporary files contain a twiddle anywhere in the
243 # name.
244 VIM_TEMP_FILE_EXT = "~"
245
246 # Suffixes for temporary directories which should be deleted during the
247 # cleanup phase.
248 TEMP_DIR_SUFFIX = r""" # Use Python raw strings.
249 (?: Debug | Release | # C++ compiler
250 ipch | \.vs | # Temp directories from VC++ compiler
251 \.Trashes | \.Trash) # macOS recycle bin
252 $
253 """
254
255 # File extension for an internally created temporary file.
256 TEMP_FILE_EXT = ".new"
257
258 # Identify source file types.
259 HYPERTEXT_FILE_PATTERN = r""" # Use Python raw strings.
260 (\. # Match the filename suffix after the .
261 (?: html | htm | # HTML hypertext
262 css) # CSS style sheet
263 $) # End of line.
264 """
265
266 SOURCE_FILE_PATTERN = r""" # Use Python raw strings.
267 (?: makefile$ | # Any file called makefile is a source file.
268 # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
269 .vimrc$ | # Vim script
270 (.bashrc$ | # Bash configuration files.
271 .bash_profile$ |
272 .bash_logout$)
273 |
274 (.gitignore$ | # Git configuration files.
275 .gitignore_global$ |
276 .gitconfig$)
277 |
278 (\. # Match the filename suffix after the .
279 # Now match any of these suffixes:
280 (?:
281 c | cpp | h | hpp | # C++ and C
282 js | # Javascript
283 py | # Python
284 lsp | # LISP
285 m | # MATLAB
286 FOR | for | f | # FORTRAN
287 yaml | # YAML = JSON superset
288 tex | # LaTeX
289 txt | dat | # Data files
290 sh) # Bash
291 $) # End of line.
292 )
293 """
294
295 # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
296 SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
297 (?: ^life\.html$ | # We want a listing of this particular HTML file.
298 ^index\.html$ | # I want to list my top level HTML file. (There is only one file with this name at the top level web directory.)
299 ^webPageDesign\.html$ | # and also this HTML example file, but no others.
300 ^StyleSheet\.css$ ) # I want to list my style sheet.
301 """
302
303 # Files for which we want to generate a syntax highlighted source code listing.
304 # Uses an f-string combined with a raw-string.
305 FILE_TO_HIGHLIGHT_PATTERN = fr"""
306 (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} |
307 {SOURCE_FILE_PATTERN} )
308 """
309
310 # Update my email address.
311 # This is tricky: Prevent matching and updating the name within in this
312 # Python source file by using the character class brackets.
313 OLD_EMAIL_ADDRESS = r"""
314 artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
315 """
316 NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
317
318 # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.
319 # Read patterns and strings from the updateweb.yaml file.
320 STRING_REPLACEMENT_LIST = []
321 # Pairs of test strings and their correct match/replacements.
322 STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
323
324 # Change all old software version lines of the form
325 # Primpoly Version nnnn.nnnn
326 # to the new software version.
327 # Note that since we are using raw strings leading and trailing whitespace
328 # is ignored in both pattern and replacement.
329 CURRENT_SOFTWARE_VERSION = r"""
330 Primpoly
331 \s+
332 Version
333 \s+
334 ([0-9]+) # The two part version number NNN.nnn
335 \.
336 ([0-9]+)
337 """
338 NEW_SOFTWARE_VERSION = r"""
339 Primpoly Version 16.3
340 """
341
342 # Match a copyright line. Then extract the copyright symbol which can be
343 # ascii (C) or HTML © and extract the old year.
344 TWO_DIGIT_YEAR_FORMAT = "%02d"
345 COPYRIGHT_LINE = r"""
346 Copyright # Copyright.
347 \s+ # One or more spaces.
348 (?P<symbol> \(C\) | ©) # Match and extract the copyright symbol.
349 \D+ # Any non-digits.
350 (?P<old_year>[0-9]+) # Match and extract the old copyright year,
351 # then place it into variable 'old_year'
352 - # to
353 ([0-9]+) # New copyright year.
354 """
355
356 # Match a line containing the words,
357 # last updated YY
358 # and extract the two digit year YY.
359 LAST_UPDATED_LINE = r"""
360 last\s+ # Match the words "last updated"
361 updated\s+
362 \d+ # Day number
363 \s+ # One or more blanks or tab(
364 [A-Za-z]+ # Month
365 \s+ # One or more blanks or tabs
366 (?P<year>\d+) # Two digit year. Place it into the variable 'year'
367 """
368
369 # Web server root directory.
370 DEFAULT_ROOT_DIR = "/"
371
372 # The ftp listing occasionally shows a date newer than the actual date.
373 # On my server, it could be 6 months newer when we are near New Year's Day. Typically the server file time is only a 1 or 2 minutes newer.
374 # But if the remote file time is much newer, it might be an old file with a bad date/time.
375 # Upload the file to be safe.
376 # How to see the time differences from the log if they are large:
377 # egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
378 # Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
379 # How to see the time differences from the log if they are small and we wait and NOT upload:
380 # egrep -o "Remote file.*is newer.*days" logRemote.txt
381 # Remote file error404.html is newer by 102.0 seconds = 1.7 minutes = 0.0 hours = 0.0 days
382 # Remote file index.html is newer by 113.0 seconds = 1.9 minutes = 0.0 hours = 0.0 days
383 MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
384 DAYS_NEWER_FOR_REMOTE_BEFORE_UPLOAD = (1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD
385
386 # Upload only if we are newer by more than a few minutes. Allows for a little slop in time stamps on server or host.
387 MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
388 DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD = (1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD
389
390 # An ftp list command line should be at least this many chars, or we'll
391 # suspect and error.
392 MIN_FTP_LINE_LENGTH = 7
393
394 # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
395 # ftp listings are generally similar to UNIX ls -l listings.
396 #
397 # Some examples:
398 #
399 # (1) Freeservers ftp listing,
400 #
401 # 0 1 2 3 4 5 6 7 8
402 # drwxr-xr-x 3 1000 1000 4096 Nov 18 2006 Electronics
403 # -rw-r--r-- 1 1000 1000 21984 Jun 4 03:46 StyleSheet.css
404 # -rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html
405 #
406 # (2) atspace ftp listing,
407 #
408 # drwxr-xr-x 3 seanerikoconnor vusers 49 Apr 7 2006 Electronics
409 # -rw-r--r-- 1 seanerikoconnor vusers 21984 Jun 4 04:03 StyleSheet.css
410 #
411 FTP_LISTING = r"""
412 [drwx-]+ # Unix type file mode.
413 \s+ # One or more blanks or tabs.
414 \d+ # Number of links.
415 \s+
416 \w+ # Owner.
417 \s+
418 \w+ # Group.
419 \s+
420 (?P<bytes> \d+) # File size in bytes, placed into the variable 'bytes'.
421 \s+
422 (?P<mon> \w+) # Month modified, placed into the variable 'mon'.
423 \s+
424 (?P<day> \d+) # Day modified, placed into the variable 'day'.
425 \s+
426 (
427 (?P<hour> \d+) # Hour modified, placed into the variable 'hour'.
428 :
429 (?P<min> \d+) # Minute modified, placed into the variable 'min'.
430 |
431 (?P<year> \d+) # If hours and minutes are absent (happens when year is not the current year),
432 # extract the year instead.
433 )
434 \s+
435 (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+) # Path and file name containing letters, numbers,
436 # and funny characters. We must escape some of
437 # these characters with a backslash, \.
438 """
439
440 # HTML header up to the style sheet.
441 BASIC_HTML_BEGIN = \
442 """
443 <!DOCTYPE html>
444 <html lang="en-US"> <!-- Set language of this page to USA English. -->
445
446 <head>
447 <!-- This page uses Unicode characters. -->
448 <meta charset="utf-8">
449
450 <!-- Set viewport to actual device width. Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
451 <meta name="viewport" content="width=device-width, initial-scale=1">
452
453 <!-- Title appears in the web browser tab for this page. The browser also uses it to bookmark this page. -->
454 <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
455
456 <!-- Search engines will search using words in this description. They will also display title in their search results. -->
457 <meta name="description" content="Syntax Colored Source Code Listing">
458
459 <!-- Some content management software uses the author's name. -->
460 <meta name="author" content="Sean Erik O'Connor">
461
462 <meta name="copyright" content="Copyright (C) 1986-2024 by Sean Erik O'Connor. All Rights Reserved.">
463
464 <!-- Begin style sheet insertion -->
465 <style>
466 /* Default settings for all my main web pages. */
467 body
468 {
469 /* A wide sans-serif font is more readable on the web. */
470 font-family: Verdana, Geneva, "Trebuchet MS", sans-serif ;
471
472 /* Set the body font size a little smaller than the user's default browser setting. */
473 font-size: 0.8em ;
474
475 /* Black text is easier to read. */
476 color: black ;
477
478 /* More vertical space between lines for more pleasant reading. Use a unitless font height multiplier.
479 Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
480 line-height: 1.7 ;
481 }
482
483 <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
484 """
485
486 # After the style sheet and up to the start of the article in the body.
487 BASIC_HTML_MIDDLE = \
488 """
489 </style>
490 </head>
491
492 <body>
493 <article class="content">
494 """
495
496 # After the source code listing, finish the article, body and html document.
497 BASIC_HTML_END = \
498 """
499 </article>
500 </body>
501
502 </html>
503 """
504
505 def __init__(self):
506 """Set up the user settings."""
507
508 self.local_root_dir = ""
509
510 # Import the user settings from the parameter file.
511 self.get_local_root_dir()
512 self.get_server_settings()
513
514 self.precompile_regular_expressions()
515
516 def get_server_settings(self):
517 """
518 Read web account private settings from a secret offline parameter file.
519 These also hold patterns to match and replace in all of our source pages.
520 """
521
522 # Private file which contains my account settings.
523 settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
524 # Recommended by
525 # https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
526 try:
527 stream = open(settings_file_name, "r")
528 except OSError as detail:
529 logging.error(f"Cannot open the YAML file {settings_file_name:s}. Unable to read the settings because: {str(detail):s}")
530 # Rethrow the exception higher.
531 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ") from detail
532 # Read all the YAML documents in the file.
533 yaml_contents = yaml.load_all(stream, Loader)
534 yaml_document_list: list[Any] = []
535 for yaml_doc in yaml_contents:
536 yaml_document_list.append(yaml_doc)
537 num_yaml_docs = len(yaml_document_list)
538 if num_yaml_docs != 2:
539 logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file. Aborting...")
540 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ")
541
542 # Load all the server settings.
543 self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
544 self.USER_NAME = yaml_document_list[0]['ftp_user_name']
545 self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
546 self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
547 self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
548
549 # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
550 self.STRING_REPLACEMENT_LIST = []
551 pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
552 for pat_rep in pat_rep_yaml_list:
553 # Fetch the regular expression and compile it for speed.
554 verbose_regex = pat_rep['pattern']
555 pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
556 # Since we use raw strings, we need to strip off leading and trailing whitespace.
557 replacement_string = pat_rep['replacement_string'].strip().lstrip()
558 self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
559
560 # Load the test and verify strings.
561 test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
562 for test_verify_string in test_verify_strings_list:
563 test_string = test_verify_string['test_string'].strip().lstrip()
564 verify_string = test_verify_string['verify_string'].strip().lstrip()
565 self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
566
567 print(" ...done!", flush=True)
568 return
569
570 def get_local_root_dir(self):
571 """Get the local website root directory on this platform."""
572
573 # Each platform has a definite directory for the web page.
574 local_web_dir_path = "/Desktop/Sean/WebSite"
575
576 if sys.platform.startswith('darwin'):
577 self.local_root_dir = str(Path.home()) + local_web_dir_path
578 # My Cyperpower PC running Ubuntu Linux.
579 elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
580 self.local_root_dir = str(Path.home()) + local_web_dir_path
581 return
582
583 def precompile_regular_expressions(self):
584 """For speed precompile the regular expression search patterns."""
585 self.COPYRIGHT_LINE = re.compile(self.COPYRIGHT_LINE, re.VERBOSE | re.IGNORECASE)
586 self.CURRENT_SOFTWARE_VERSION = re.compile(self.CURRENT_SOFTWARE_VERSION, re.VERBOSE | re.IGNORECASE)
587 self.FTP_LISTING = re.compile(self.FTP_LISTING, re.VERBOSE | re.IGNORECASE)
588 self.TEMP_FILE_SUFFIXES = re.compile(self.TEMP_FILE_SUFFIXES, re.VERBOSE | re.IGNORECASE)
589 self.TEMP_DIR_SUFFIX = re.compile(self.TEMP_DIR_SUFFIX, re.VERBOSE)
590 self.SOURCE_FILE_PATTERN = re.compile(self.SOURCE_FILE_PATTERN, re.VERBOSE)
591 self.HYPERTEXT_FILE_PATTERN = re.compile(self.HYPERTEXT_FILE_PATTERN, re.VERBOSE)
592 self.OLD_EMAIL_ADDRESS = re.compile(self.OLD_EMAIL_ADDRESS, re.VERBOSE | re.IGNORECASE)
593 self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
594 self.LAST_UPDATED_LINE = re.compile(self.LAST_UPDATED_LINE, re.VERBOSE | re.IGNORECASE)
595
596# ----------------------------------------------------------------------------
597# Unit test individual functions.
598# ----------------------------------------------------------------------------
599
600class UnitTest(unittest.TestCase):
601 """Initialize the UnitTest class."""
602 def setUp(self):
603 self.user_settings = UserSettings()
604 self.user_settings.get_local_root_dir()
605
606 def tearDown(self):
607 """Clean up the UnitTest class."""
608 self.user_settings = None
609
610 def test_copyright_updating(self):
611 """Test copyright line updating to the current year."""
612 # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
613 line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2024 by Sean Erik O'Connor"
614 line_after_update_actual = "Copyright (C) 1999-2024 by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2024 by Sean Erik O'Connor"
615 pat = self.user_settings.COPYRIGHT_LINE
616 match = pat.search(line_before_update)
617
618 if match:
619 old_year = int(match.group('old_year'))
620 # Same as call to self.get_current_year():
621 current_year = int(time.gmtime()[0])
622 if old_year < current_year:
623 # We matched and extracted the old copyright symbol into the variable
624 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
625 # We now insert it back by placing the special syntax
626 # \g<symbol> into the replacement string.
627 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
628 str(current_year)
629 line_after_update_computed = pat.sub(new_copyright, line_before_update)
630 self.assertEqual(
631 line_after_update_actual,
632 line_after_update_computed,
633 f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
634 else:
635 self.fail()
636 else:
637 self.fail()
638
639 def test_update_software_version(self):
640 """Test updating to a new version of Primpoly."""
641 # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
642 old_version_line = "| Primpoly Version 00." + "0 - A Program for Computing Primitive Polynomials.|"
643 new_version_line = "| Primpoly Version 16." + "3 - A Program for Computing Primitive Polynomials.|"
644 pat = self.user_settings.CURRENT_SOFTWARE_VERSION
645 match = pat.search(old_version_line)
646 if match:
647 # Note that since we are using raw strings leading and trailing
648 # whitespace is ignored.
649 new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
650 updated_version_line = pat.sub(new_version, old_version_line)
651 self.assertEqual(updated_version_line, new_version_line, f"updated version line = {updated_version_line:s} new line = {new_version_line:s}")
652 else:
653 self.fail()
654
655 def test_extract_filename_from_ftp_listing(self):
656 """Test parsing an FTP listing."""
657 ftp_line = "-rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html"
658 extracted_file_name = "allclasses-frame.html"
659 pat = self.user_settings.FTP_LISTING
660 match = pat.search(ftp_line)
661 if match:
662 filename = match.group('filename')
663 self.assertEqual(
664 filename,
665 extracted_file_name,
666 f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
667 else:
668 self.fail()
669
670 def test_get_file_time_and_date(self):
671 """Test getting a file time and date."""
672 # Point to an old file.
673 file_name = "./Images/home.png"
674 full_file_name = self.user_settings.local_root_dir + '/' + file_name
675 # Get the UTC time.
676 file_epoch_time = os.path.getmtime(full_file_name)
677 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
678 # Create a datetime object for the file.
679 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
680 # Check if the file time matches what we would see if we did ls -l <file_name>
681 computed = f"file {file_name:s} datetime {d.ctime():s}"
682 actual = "file ./Images/home.png datetime Thu Jul 18 16:55:44 2024"
683 self.assertEqual(computed, actual)
684
685 def test_set_file_time_and_date(self):
686 """Test setting a file time and date."""
687 file_name = "./Images/home.png"
688 full_file_name = self.user_settings.local_root_dir + '/' + file_name
689 # Create a temporary file in the same directory.
690 temp_file_name = "temporal.tmp"
691 full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
692 try:
693 with open(full_temp_file_name, 'w') as fp:
694 fp.write("The End of Eternity")
695 except OSError as detail:
696 logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s} Aborting...")
697 raise UpdateWebException("Failed the unit test for setting time and date of a file. See the log file for details. Aborting...") from detail
698 # Get the old file time. Set the temporary file to the same time.
699 file_stat = os.stat(full_file_name)
700 os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
701 # What is the temporary file's time now?
702 file_epoch_time = os.path.getmtime(full_temp_file_name)
703 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
704 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
705 # Is the temporary file time set properly?
706 computed = f"file {file_name:s} datetime {d.ctime():s}"
707 actual = "file ./Images/home.png datetime Thu Jul 18 16:55:44 2024"
708 self.assertEqual(computed, actual)
709 os.remove(full_temp_file_name)
710
711 def test_difference_of_time_and_date(self):
712 """Test a date difference calculation."""
713 file_name = "./Images/home.png"
714 full_file_name = self.user_settings.local_root_dir + '/' + file_name
715 # Get the UTC time.
716 file_epoch_time = os.path.getmtime(full_file_name)
717 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
718 # Create a datetime object for the file.
719 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
720 # Slightly change the date and time by adding 1 minute.
721 d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1) # year, month, day, hour, minute, second
722 time_delta = d2 - d
723 seconds_different = time_delta.total_seconds()
724 minutes_different = seconds_different / 60.0
725 hours_different = minutes_different / 60.0
726 days_different = hours_different / 24.0
727 computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
728 actual = "difference 0.00001 days, 0.00028 hours 0.01667 minutes, 1.00000 seconds"
729 self.assertEqual(computed, actual)
730
731 def test_pattern_match_dir_to_skip(self):
732 """Test if skipping certain named directories is recoginizing the dir names."""
733 dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
734 pat = re.compile(self.user_settings.DIR_TO_SKIP)
735 if pat.search(dir_skip):
736 self.assertTrue(True)
737 else:
738 self.assertTrue(False)
739
740 def test_file_name_to_syntax_highlight(self):
741 """Test if syntax highlighting recognizes file names to highlight."""
742 file_name1 = "Computer/hello.lsp"
743 file_name2 = "Computer/life.html"
744 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
745 if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
746 self.assertTrue(True)
747 else:
748 self.assertTrue(False)
749
750 def test_user_settings(self):
751 """Test whether user settings are correctly initialized."""
752 computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
753 actual = "File size limit = 50000 K"
754 self.assertEqual(computed, actual, "File size limit settings are incorrect.")
755
756 def test_check_replace_substring(self,debug=False):
757 """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
758 For troubleshooting, turn on debug.
759 """
760 test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
761 # Iterate over all test strings.
762 for pair in test_verify_pairs:
763 [test_string, verify_string] = pair
764 if debug:
765 print( f"\n>>>>>>> next test/verify string pair = \n\t{pair}")
766 # Iterate over all patterns and replacements.
767 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
768 [pat, rep_string] = match_replace_tuple
769 match = pat.search(test_string)
770 # The pattern match succeeds.
771 if match:
772 try:
773 sub = pat.sub(rep_string, test_string)
774 # String replacement succeeds for this pattern/replace pair iteration.
775 if debug:
776 print( f">>>>>>> apply match and replace: \n\t{test_string} ---> {sub}")
777 test_string = sub
778 except IndexError as detail:
779 print(f"Caught an exception: {str(detail):s}. Replacement failed.")
780 if debug:
781 self.assertTrue(False)
782 elif debug:
783 print( f">>>>>>> match failed for pattern \n\t{pat} \nwhen applied to string \n\t{test_string}")
784 # No match, so go on to the next pattern and don't change test_string.
785 # Done with all pattern/replace on test string.
786 # Check this test string in the list.
787 self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
788
789# ----------------------------------------------------------------------------
790# Command line options.
791# ----------------------------------------------------------------------------
792
793class CommandLineSettings(object):
794 """Get the command line options."""
795
796 def __init__(self, user_settings, raw_args=None):
797 """Get command line options"""
798 command_line_parser = argparse.ArgumentParser(
799 description="updateweb options")
800
801 # Log all changes, not just warnings and errors.
802 command_line_parser.add_argument(
803 "-v",
804 "--verbose",
805 help="Turn on verbose mode to log everything",
806 action="store_true")
807
808 # Clean up the local website only.
809 command_line_parser.add_argument(
810 "-c",
811 "--clean",
812 help="Do a cleanup on the local web site only.",
813 action="store_true")
814
815 # Clean up the local website only.
816 command_line_parser.add_argument(
817 "-m",
818 "--mathjax",
819 help="""ALSO upload mathjax directory.\
820 Do this if you have a new version of MathJax or if you've reset your server and deleted the /mathjax remote directory.\
821 You'll need to do git restore on any altered files in the local branch and git clean -f to remove any newly created files.\
822 NOTE: If you did reset your server and delete all files, run the command find . -name '*.*' -exec touch {} \\; from the web page root directory.\
823 Also run find . -name '*' -exec touch {} \\; This will ensure accurate times on the server.""",
824 action="store_true")
825
826 # Run unit tests only.
827 command_line_parser.add_argument("-t", "--test",
828 help="Run unit tests.",
829 action="store_true")
830
831 args = command_line_parser.parse_args(raw_args)
832
833 if args.verbose:
834 user_settings.VERBOSE = True
835 if args.clean:
836 user_settings.CLEAN = True
837 if args.test:
838 user_settings.UNITTEST = True
839 if args.mathjax:
840 user_settings.MATHJAX = True
841
842# ----------------------------------------------------------------------------
843# Base class which describes my web site overall.
844# ----------------------------------------------------------------------------
845
846class WebSite(object):
847 """
848 Abstract class used for analyzing both local and remote (ftp server) websites.
849 Contains the web-walking functions which traverse the directory structures and files.
850 These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
851 Child classes may define additional functions which only they need.
852 """
853
854 def __init__(self, settings):
855 """Set up root directories"""
856
857 # Import the user settings.
858 self.user_settings = settings
859
860 # Queue keeps track of directories not yet processed.
861 self.queue = []
862
863 # List of all directories traversed.
864 self.directories = []
865
866 # List of files traversed, with file information.
867 self.files = []
868
869 # Find out the root directory and go there.
870 self.root_dir = self.get_root_dir()
871 self.go_to_root_dir(self.root_dir)
872
873 @staticmethod
874 def get_current_year():
875 """Get the current year."""
876 return int(time.gmtime()[0])
877
878 @staticmethod
879 def get_current_two_digit_year():
880 """Get the last two digits of the current year."""
881 return WebSite.get_current_year() % 100
882
883 @staticmethod
884 def is_file_info_type(file_info):
885 """Check if we have a file information structure or merely a simple file name."""
886 try:
887 if isinstance(file_info, list):
888 return True
889 elif isinstance(file_info, str):
890 return False
891 else:
892 logging.error("is_file_info_type found a bad type. Aborting...")
893 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ")
894 except TypeError as detail:
895 logging.error(f"is_file_info_type found a bad type {str(detail):s}. Aborting...")
896 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ") from detail
897
898 def get_root_dir(self):
899 """Subclass: Put code here to get the root directory"""
900 return ""
901
902 def go_to_root_dir(self, root_dir):
903 """Subclass: Put code here to go to the root directory"""
904 pass # Pythons's do-nothing statement.
905
906 def one_level_down(self, d):
907 """Subclass: Fill in with a method which returns a list of the
908 directories and files immediately beneath dir"""
909 return [], []
910
911 def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
912 """Walk a directory in either depth first or breadth first order. BFS is the default."""
913
914 # Get all subfiles and subdirectories off this node.
915 subdirectories, subfiles = self.one_level_down(d)
916
917 # Add all the subfiles in order.
918 for f in subfiles:
919
920 name = self.strip_root(f)
921 logging.debug(f"Webwalking: Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
922
923 # Some files are private so skip them from consideration.
924 pat = re.compile(self.user_settings.FILE_TO_SKIP)
925
926 if pat.search(name[self.user_settings.FILE_NAME]):
927 logging.warning(
928 f"Webwalking: Skipping private file {name[self.user_settings.FILE_NAME]:s}")
929 # Don't upload the log file due to file locking problems.
930 elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
931 logging.debug(f"Webwalking: Skipping log file {name[self.user_settings.FILE_NAME]:s}")
932 # File size limit on some servers.
933 else:
934 self.files.append(name)
935
936 # Queue up the subdirectories.
937 for d in subdirectories:
938 # Some directories are private such as .git or just temporary file
939 # caches so skip them from consideration.
940 pat = re.compile(self.user_settings.DIR_TO_SKIP)
941 if pat.search(d):
942 logging.warning(f"Webwalking: Skipping private dir {d:s}")
943 else:
944 logging.debug(f"Webwalking: Pushing dir {d:s} on the queue.")
945 self.queue.append(d)
946
947 # Search through the directories.
948 while len(self.queue) > 0:
949 # For breadth first search, remove from beginning of queue.
950 if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
951 d = self.queue.pop(0)
952
953 # For depth first search, remove from end of queue.
954 elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
955 d = self.queue.pop()
956 else:
957 d = self.queue.pop(0)
958
959 name = self.strip_root(d)
960 logging.debug(f"Webwalking: Adding relative directory {name:s} to list, full path = {d:s}.")
961 self.directories.append(name)
962
963 self.walk(d)
964
965 def strip_root(self, file_info):
966 """Return a path, but strip off the root directory"""
967
968 root = self.root_dir
969
970 # Extract the file name.
971 if self.is_file_info_type(file_info):
972 name = file_info[self.user_settings.FILE_NAME]
973 else:
974 name = file_info
975
976 # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
977 # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
978 # Art/foo.txt
979 lenroot = len(root)
980 if root == self.user_settings.DEFAULT_ROOT_DIR:
981 pass
982 else:
983 lenroot = lenroot + 1
984
985 stripped_path = name[lenroot:]
986
987 if self.is_file_info_type(file_info):
988 # Update the file name only.
989 return [stripped_path,
990 file_info[self.user_settings.FILE_TYPE],
991 file_info[self.user_settings.FILE_DATE_TIME],
992 file_info[self.user_settings.FILE_SIZE]]
993 else:
994 return stripped_path
995
996 def append_root_dir(self, root_dir, name):
997 """Append the root directory to a path"""
998
999 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1000 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1001 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1002 return root_dir + name
1003 else:
1004 return root_dir + "/" + name
1005
1006 def scan(self):
1007 """Scan the directory tree recursively from the root"""
1008 logging.debug(f"Webwalking: Beginning recursive directory scan from root directory {self.root_dir:s}")
1009 self.walk(self.root_dir)
1010
1011 def modtime(self, f):
1012 """Subclass: Get file modification time"""
1013 pass
1014
1015 def finish(self):
1016 """Quit web site"""
1017 logging.debug(f"Finished with WebSite object of class {type(self)}")
1018 pass
1019
1020# ----------------------------------------------------------------------------
1021# Subclass which knows about the local web site on disk.
1022# ----------------------------------------------------------------------------
1023
1024class LocalWebSite(WebSite):
1025 """Walk the local web directory on local disk down from the root.
1026 Clean up temporary files and do other cleanup work."""
1027
1028 def __init__(self, settings):
1029 """Go to web page root and list all files and directories."""
1030
1031 # Initialize the parent class.
1032 WebSite.__init__(self, settings)
1033
1034 self.root_dir = self.get_root_dir()
1035 logging.debug(f"LocalWebSite.__init__(): \tRoot directory: {self.root_dir:s}")
1036
1037 def get_root_dir(self):
1038 """Get the name of the root directory"""
1039 return self.user_settings.local_root_dir
1040
1041 def go_to_root_dir(self, root_dir):
1042 """Go to the root directory"""
1043
1044 # Go to the root directory.
1045 logging.debug(f"LocalWebSite.go_to_root_dir(): \tchdir to root directory: {root_dir:s}")
1046 os.chdir(root_dir)
1047
1048 # Read it back.
1049 self.root_dir = os.getcwd()
1050 logging.debug(f"LocalWebSite.go_to_root_dir(): \tgetcwd root directory: {self.root_dir:s}")
1051
1052 def one_level_down(self, d):
1053 """List all files and subdirectories in the current directory, dir. For files, collect file info
1054 such as time, date and size."""
1055
1056 directories = []
1057 files = []
1058
1059 # Change to current directory.
1060 os.chdir(d)
1061
1062 # List all subdirectories and files.
1063 dir_list = os.listdir(d)
1064
1065 if dir_list:
1066 for line in dir_list:
1067 # Add the full path prefix from the root.
1068 name = self.append_root_dir(d, line)
1069 logging.debug(f"LocalWebSite.one_level_down(): \tlocal dir or file {name:s}")
1070
1071 # Is it a directory or a file?
1072 if os.path.isdir(name):
1073 directories.append(name)
1074 elif os.path.isfile(name):
1075 # First assemble the file information of name, time/date and size into a list.
1076 # Can index it like an array. For example,
1077 # file_info =
1078 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
1079 # 1, -- Enum type FileType.FILE = 1.
1080 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a date/time class.
1081 # 4675] -- File size in bytes.
1082 file_info = [name,
1083 FileType.FILE,
1084 self.get_file_date_time(name),
1085 self.get_file_size(name)]
1086 files.append(file_info)
1087
1088 # Sort the names into order.
1089 if directories:
1090 directories.sort()
1091 if files:
1092 files.sort()
1093
1094 return directories, files
1095
1096 @staticmethod
1097 def get_file_date_time(file_name):
1098 """Get a local file time and date in UTC."""
1099
1100 file_epoch_time = os.path.getmtime(file_name)
1101 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1102 # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1103 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1104 return d
1105
1106 @staticmethod
1107 def get_file_size(file_name):
1108 """Get file size in bytes."""
1109 return os.path.getsize(file_name)
1110
1111 def copy_to_text_file(self, file_name):
1112 """Make a copy of a file with a .txt extension"""
1113
1114 # Remove the old copy with the text file extension.
1115 copy_file_name = file_name + self.user_settings.TEXT_FILE_EXT
1116 try:
1117 os.remove(copy_file_name)
1118 except OSError as detail:
1119 logging.error(f"Cannot remove old text file copy {copy_file_name:s}: {str(detail):s}")
1120
1121 # Create the new copy, which is an exact duplicate.
1122 self.process_lines_of_file(file_name, copy_file_name)
1123
1124 # Make the new copy have the same modification and access time and date as the original
1125 # since it is just an exact copy.
1126 # That way we won't upload copies with newer times constantly, just because they look as
1127 # though they've been recently modified.
1128 file_stat = os.stat(file_name)
1129 os.utime(copy_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1130 logging.debug(f"Reset file time to original time for copy {copy_file_name:s}")
1131
1132 @staticmethod
1133 def clean_up_temp_file(temp_file_name, file_name, changed):
1134 """Remove the original file, rename the temporary file name to the original name.
1135 If there are no changes, just remove the temporary file.
1136 """
1137
1138 if changed:
1139 # Remove the old file now that we have the rewritten file.
1140 try:
1141 os.remove(file_name)
1142 logging.debug(f"Changes were made. Removed original file {file_name:s}")
1143 except OSError as detail:
1144 logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}. Need to remove it manually.")
1145
1146 # Rename the new file to the old file name.
1147 try:
1148 os.rename(temp_file_name, file_name)
1149 logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1150 except OSError as detail:
1151 logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}. Need to rename manually")
1152 else:
1153 # No changes? Remove the temporary file.
1154 try:
1155 os.remove(temp_file_name)
1156 logging.debug(f"No changes were made. Removed temporary file {temp_file_name:s}")
1157 except OSError as detail:
1158 logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}. Need to remove it manually.")
1159 return
1160
1161 @staticmethod
1162 def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1163 """
1164 Process each line of a file with a list of functions. Create a new temporary file.
1165
1166 The default list is None which means make an exact copy.
1167 """
1168
1169 # Assume no changes.
1170 changed = False
1171
1172 # Open both input and output files for processing. Check if we cannot do it.
1173 fin = None
1174 try:
1175 fin = open(in_file_name, "r")
1176 except IOError as detail:
1177 logging.error(f"process_lines_of_file(): \tCannot open file {in_file_name:s} for reading: {str(detail):s} Aborting...")
1178 if fin is not None:
1179 fin.close()
1180 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1181 fout = None
1182 try:
1183 fout = open(out_file_name, "w")
1184 except IOError as detail:
1185 logging.error(f"process_lines_of_file(): \tCannot open file {out_file_name:s} for writing: {str(detail):s} Aborting...")
1186 if fout is not None:
1187 fout.close()
1188 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1189
1190 # Read each line of the file, aborting if there is a read error.
1191 try:
1192 line = fin.readline()
1193
1194 # Rewrite the next line of the file using all the rewrite functions.
1195 while line:
1196 original_line = line
1197 # If we have one or more rewrite functions...
1198 if process_line_function_list is not None:
1199 # ...apply each rewrite functions to the line, one after the other in order.
1200 for processLineFunction in process_line_function_list:
1201 if processLineFunction is not None:
1202 line = processLineFunction(line)
1203
1204 if original_line != line:
1205 logging.debug("Rewrote the line" + ":" + f"|{original_line:s}|" + "into" + ":" + f"|{line:s}| for file" + ":" + f"{in_file_name:s}")
1206 changed = True
1207
1208 fout.write(line)
1209
1210 line = fin.readline()
1211
1212 fin.close()
1213 fout.close()
1214 except IOError as detail:
1215 logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s} Aborting...")
1216 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1217
1218 if changed:
1219 logging.debug(f"process_lines_of_file(): \tRewrote original file {in_file_name:s}."
1220 f"Changes are in temporary copy {out_file_name:s}")
1221
1222 # Return True if any lines were changed.
1223 return changed
1224
1225 def clean(self):
1226 """Scan through all directories and files in the local on disk website and clean them up."""
1227
1228 num_source_files_changed = 0
1229 num_source_files_syntax_highlighted = 0
1230
1231 logging.debug("Cleaning up the local web page.")
1232
1233 if self.directories is None or self.files is None:
1234 logging.error("Web site has no directories or files. Aborting...")
1235 raise UpdateWebException("Internal error for cleaning up the local web site. See the log file for details. Aborting... ")
1236
1237 for d in self.directories:
1238
1239 if self.is_temp_dir(d):
1240 # Add the full path prefix from the root.
1241 name = self.append_root_dir(self.get_root_dir(), d)
1242 try:
1243 logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1244 shutil.rmtree(name)
1245 except OSError as detail:
1246 logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1247
1248 for f in self.files:
1249 # Add the full path prefix from the root.
1250 full_file_name = self.append_root_dir(
1251 self.get_root_dir(), f[self.user_settings.FILE_NAME])
1252
1253 # Remove all temporary files.
1254 if self.is_temp_file(f):
1255 try:
1256 logging.debug(f"Removing temp file {full_file_name:s}")
1257 os.remove(full_file_name)
1258 except OSError as detail:
1259 logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1260
1261 # Update source code files.
1262 if self.is_source_or_hypertext_file(f):
1263 changed = self.rewrite_source_file(full_file_name)
1264 if changed:
1265 num_source_files_changed += 1
1266 logging.debug(f"Rewrote source code file {self.root_dir:s}")
1267
1268 # Generate a syntax highlighted code listing.
1269 # Make it the same time and date as the original code. Then, only if there are recent changes, we will update the remote server.
1270 if self.is_file_to_syntax_highlight(f):
1271 # full_file_name_highlighted = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1272 full_file_name_highlighted = self.create_syntax_highlighted_code_listing(full_file_name)
1273 if full_file_name_highlighted is not None:
1274 logging.debug(f"Generated a syntax highlighted source listing file {full_file_name_highlighted:s} for the file {full_file_name:s}")
1275 else:
1276 logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1277 num_source_files_syntax_highlighted += 1
1278
1279 logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1280 logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1281
1282 def is_temp_file(self, file_info):
1283 """Identify a file name as a temporary file"""
1284
1285 file_name = file_info[self.user_settings.FILE_NAME]
1286
1287 # Suffixes and names for temporary files be deleted.
1288 pat = self.user_settings.TEMP_FILE_SUFFIXES
1289 match = pat.search(file_name)
1290 # Remove any files containing twiddles anywhere in the name.
1291 if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1292 return True
1293
1294 return False
1295
1296 def is_temp_dir(self, dir_name):
1297 """Identify a name as a temporary directory."""
1298
1299 p = self.user_settings.TEMP_DIR_SUFFIX
1300 return p.search(dir_name)
1301
1302 def is_source_or_hypertext_file(self, file_info):
1303 """ Check if the file name is a source file or a hypertext file."""
1304
1305 file_name = file_info[self.user_settings.FILE_NAME]
1306 p1 = self.user_settings.SOURCE_FILE_PATTERN
1307 p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1308 if p1.search(file_name) or p2.search(file_name):
1309 return True
1310 else:
1311 return False
1312
1313 def is_file_to_syntax_highlight(self, file_info):
1314 """Check if this file type should have a syntax highlighted source listing."""
1315
1316 # Take apart the file name.
1317 full_file_name = file_info[self.user_settings.FILE_NAME]
1318 file_name = Path(full_file_name).name
1319
1320 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1321 if p.search(file_name):
1322 return True
1323 else:
1324 return False
1325
1326 def rewrite_substring(self, line):
1327 """Rewrite a line containing a pattern of your choice"""
1328
1329 # Start with the original unchanged line.
1330 rewritten_line = line
1331
1332 # Do the replacements in order from first to last.
1333 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1334 # Get the next pattern match replacement string tuple.
1335 [pat, rep_string] = match_replace_tuple
1336 # Does it match? Then do string substitution, else leave the line unchanged.
1337 match = pat.search(rewritten_line)
1338 if match:
1339 # Now we have these cases:
1340 # -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1341 # -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ...
1342 # If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1343 try:
1344 sub = pat.sub(rep_string, rewritten_line)
1345 rewritten_line = sub
1346 except IndexError as detail:
1347 logging.error(f"ERROR: {str(detail):s}. Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml. Did not rewrite the line |{rewritten_line:s}|")
1348
1349 return rewritten_line
1350
1351 def rewrite_email_address_line(self, line):
1352 """Rewrite lines containing old email addresses."""
1353
1354 # Search for the old email address.
1355 pat = self.user_settings.OLD_EMAIL_ADDRESS
1356 match = pat.search(line)
1357
1358 # Replace the old address with my new email address.
1359 if match:
1360 new_address = self.user_settings.NEW_EMAIL_ADDRESS
1361 sub = pat.sub(new_address, line)
1362 line = sub
1363
1364 return line
1365
1366 def rewrite_version_line(self, line):
1367 """Rewrite lines containing the current version of software."""
1368
1369 # Search for the current version.
1370 pat = self.user_settings.CURRENT_SOFTWARE_VERSION
1371 match = pat.search(line)
1372
1373 # Replace with the new version.
1374 if match:
1375 # Note that since we are using raw strings leading and trailing
1376 # whitespace is ignored.
1377 new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
1378 sub = pat.sub(new_version, line)
1379 line = sub
1380
1381 return line
1382
1383 def rewrite_copyright_line(self, line):
1384 """Rewrite copyright lines if they are out of date."""
1385
1386 # Match the lines,
1387 # Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1388 # Copyright © nnnn-mmmm by Sean Erik O'Connor.
1389 # and pull out the old year and save it.
1390 pat = self.user_settings.COPYRIGHT_LINE
1391 match = pat.search(line)
1392
1393 # Found a match.
1394 if match:
1395 old_year = int(match.group('old_year'))
1396
1397 # Replace the old year with the current year.
1398 # We matched and extracted the old copyright symbol into the variable
1399 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
1400 # We now insert it back by placing the special syntax \g<symbol>
1401 # into the replacement string.
1402 if old_year < WebSite.get_current_year():
1403 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
1404 str(WebSite.get_current_year())
1405 sub = pat.sub(new_copyright, line)
1406 line = sub
1407 return line
1408
1409 def rewrite_last_update_line(self, line):
1410 """Rewrite the Last Updated line if the year is out of date."""
1411
1412 # Match the last updated line and pull out the year.
1413 # last updated 01 Jan 24.
1414 p = self.user_settings.LAST_UPDATED_LINE
1415 m = p.search(line)
1416
1417 if m:
1418 last_update_year = int(m.group('year'))
1419
1420 # Convert to four digit years.
1421 if last_update_year > 90:
1422 last_update_year += 1900
1423 else:
1424 last_update_year += 2000
1425
1426 # If the year is old, rewrite to "01 Jan <current year>".
1427 if last_update_year < WebSite.get_current_year():
1428 two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1429 sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1430 line = sub
1431
1432 return line
1433
1434 def rewrite_source_file(self, file_name):
1435 """Rewrite copyright lines, last updated lines, etc."""
1436 changed = False
1437
1438 # Create a new temporary file name for the rewritten file.
1439 temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1440
1441 # Apply changes to all lines of the temporary file. Apply change functions in
1442 # the sequence listed.
1443 if self.process_lines_of_file(file_name, temp_file_name,
1444 [self.rewrite_copyright_line,
1445 self.rewrite_last_update_line,
1446 self.rewrite_email_address_line,
1447 self.rewrite_substring,
1448 self.rewrite_version_line]):
1449 logging.debug(f"Changed (rewritten) source file {file_name:s}")
1450 changed = True
1451
1452 # Rename the temporary file to the original file name. If no changes, just delete the temp file.
1453 self.clean_up_temp_file(temp_file_name, file_name, changed)
1454
1455 return changed
1456
1457 @staticmethod
1458 def create_syntax_highlighted_code_listing(full_file_name, **kwargs):
1459 """Create a syntax highlighted source listing for the file and return its name. Return None if there is an error.
1460 Keep the same date/time as the original file."""
1461
1462 # kwargs is a dictionary for key, value in kwargs.items():
1463 # for key, value in kwargs.items():
1464 # if key in kwargs:
1465 # print( f"kwargs:" )
1466 # print( f" key = |{key}|")
1467 # print( f" value = |{value}|" )
1468 dry_run_value = kwargs.get('dry_run')
1469 dry_run = False
1470 if dry_run_value is not None and dry_run_value is True:
1471 dry_run = True
1472
1473 # Take apart the file name.
1474 file_name_without_extension = Path(full_file_name).stem
1475 file_extension = Path(full_file_name).suffix
1476
1477 # Append *.html to the source code file name. This will be the syntax highlighted code listing.
1478 full_file_name_highlighted = f"{full_file_name}.html"
1479
1480 # First choose the language lexer from the file name itself if there's no extension.
1481 # Dotted file names are treated as the entire file name.
1482 match file_name_without_extension:
1483 case "makefile":
1484 lexer = MakefileLexer()
1485 case ".bash_profile"|".bashrc"|".bash_logout":
1486 lexer = BashLexer()
1487 case ".vimrc":
1488 lexer = VimLexer()
1489 case ".gitignore_global" | ".gitignore" | ".gitconfig":
1490 lexer = OutputLexer() # No formatting.
1491 case _:
1492 # Choose the language lexer from the file extension. Web stuff first, then programming languages.
1493 match file_extension:
1494 case ".html":
1495 lexer = HtmlLexer()
1496 case ".css":
1497 lexer = CssLexer()
1498 case ".js":
1499 lexer = JavascriptLexer()
1500 case ".sh":
1501 lexer = BashLexer()
1502 case ".py":
1503 lexer = PythonLexer()
1504 case ".c" | ".h":
1505 lexer = CLexer()
1506 case ".hpp" | ".cpp":
1507 lexer = CppLexer()
1508 case ".lsp":
1509 lexer = CommonLispLexer()
1510 case ".for" | ".FOR" | ".f":
1511 lexer = FortranFixedLexer() # Fixed format FORTRAN, not FORTRAN 90.
1512 case ".txt" | ".dat": # Generic data file; no formatting.
1513 lexer = OutputLexer()
1514 case ".tex":
1515 lexer = TexLexer() # LaTeX, TeX, or related files.
1516 case ".m":
1517 lexer = MatlabLexer()
1518 case ".yaml":
1519 lexer = YamlLexer()
1520 case _:
1521 logging.error(f"Can't find a lexer for file {full_file_name}. Cannot generate a syntax highlighted source listing. Aborting...")
1522 return None
1523
1524 # Read the source code file into a single string.
1525 try:
1526 with open(full_file_name, 'r') as fp:
1527 source_file_string = fp.read()
1528 except OSError as detail:
1529 logging.error(f"Cannot read the source code file {full_file_name:s} for syntax highlighting: {str(detail):s} Aborting...")
1530
1531 # Top level Pygments function generates the HTML for the highlighted code.
1532 highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1533
1534 # The style sheet is always the same for all languages.
1535 style_sheet = HtmlFormatter().get_style_defs('.highlight')
1536
1537 # Write out the syntax colored file.
1538 if dry_run:
1539 logging.debug(f"Dry run only: don't generate the syntax highlighted file {full_file_name_highlighted:s}")
1540 return None
1541 else:
1542 try:
1543 # Write out the highlighted code listing in HTML with CSS style sheet attached.
1544 with open(full_file_name_highlighted, 'w') as fp:
1545 fp.write(UserSettings.BASIC_HTML_BEGIN)
1546 fp.write(style_sheet)
1547 fp.write(UserSettings.BASIC_HTML_MIDDLE)
1548 fp.write(highlighted_html_source_file_string)
1549 fp.write(UserSettings.BASIC_HTML_END)
1550 except OSError as detail:
1551 logging.error(f"Cannot write the syntax highlighted file {full_file_name_highlighted:s}: {str(detail):s} Aborting...")
1552
1553 # Set the listing file to the same modification and access time and date as the source file.
1554 file_stat = os.stat(full_file_name)
1555 os.utime(full_file_name_highlighted, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1556
1557 # What is the listing file time now?
1558 file_epoch_time = os.path.getmtime(full_file_name_highlighted)
1559 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1560 d_list = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
1561
1562 # Source file and listing should be the same time.
1563 file_epoch_time = os.path.getmtime(full_file_name)
1564 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1565 d_source = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
1566 logging.debug(f"Generated a syntax highlighted listing {full_file_name_highlighted:s} with same time as source file {full_file_name:s}.")
1567 logging.debug(f"\tsource file time {d_source.ctime():s}")
1568 logging.debug(f"\tlisting file time {d_list.ctime():s}")
1569 return full_file_name_highlighted
1570
1571# ----------------------------------------------------------------------------
1572# Subclass which knows about the remote web site.
1573# ----------------------------------------------------------------------------
1574
1575class RemoteWebSite(WebSite):
1576 """Walk the remote web directory on a web server down from the root.
1577 Use FTP commands:
1578 https://en.wikipedia.org/wiki/List_of_FTP_commands
1579 Use the Python ftp library:
1580 https://docs.python.org/3/library/ftplib.html
1581 """
1582
1583 def __init__(self, settings, server, user, password, ftproot):
1584 """Connect to FTP server and list all files and directories."""
1585
1586 # Root directory of FTP server.
1587 self.root_dir = ftproot
1588 logging.debug(f"Requesting remote web site ftp root dir {self.root_dir:s}")
1589
1590 # Connect to FTP server and log in.
1591 try:
1592 # self.ftp.set_debuglevel( 2 )
1593 self.ftp = ftplib.FTP(server)
1594 self.ftp.login(user, password)
1595 # Catch all exceptions with the parent class Exception: all built-in,
1596 # non-system-exiting exceptions are derived from this class.
1597 except Exception as detail:
1598 # Extract the string message from the exception class with str().
1599 logging.error(f"Remote web site cannot login to ftp server: {str(detail):s} Aborting...")
1600 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1601 else:
1602 logging.debug("Remote web site ftp login succeeded.")
1603
1604 logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1605
1606 # Initialize the superclass.
1607 WebSite.__init__(self, settings)
1608
1609 def go_to_root_dir(self, root_dir):
1610 """Go to the root directory"""
1611
1612 try:
1613 # Go to the root directory.
1614 self.ftp.cwd(root_dir)
1615 logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1616
1617 # Read it back.
1618 self.root_dir = self.ftp.pwd()
1619 logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1620
1621 except Exception as detail:
1622 logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1623 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1624
1625 def get_root_dir(self):
1626 """Get the root directory name"""
1627
1628 return self.root_dir
1629
1630 def finish(self):
1631 """Quit remote web site"""
1632 logging.debug(f"Finished with WebSite object of class {type(self)}")
1633 try:
1634 self.ftp.quit()
1635 except Exception as detail:
1636 logging.error(f"Cannot ftp quit: {str(detail):s}")
1637
1638 def one_level_down(self, d):
1639 """List files and directories in a subdirectory using ftp"""
1640
1641 directories = []
1642 files = []
1643
1644 try:
1645 # ftp listing from current dir.
1646 logging.debug(f"RemoteWebSite.one_level_down(): \tftp cwd: {d:s}")
1647 self.ftp.cwd(d)
1648 dir_list = []
1649
1650 # Use the nonstandard -a option in LIST to show all the hidden .* files.
1651 # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1652 # Note the second argument requires a callback function.
1653 self.ftp.retrlines('LIST -a', dir_list.append)
1654
1655 except Exception as detail:
1656 logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}: {str(detail):s} Aborting...")
1657 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1658
1659 for line in dir_list:
1660 logging.debug(f"RemoteWebSite.one_level_down(): \tftp LIST: {line:s}")
1661
1662 # Line should at least have the minimum FTP information.
1663 if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1664 # Parse the FTP LIST and put the pieces into file_info.
1665 file_info = self.parse_ftp_list(line)
1666 logging.debug(f"RemoteWebSite.one_level_down(): \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1667
1668 # Skip over the UNIX hidden files for current and parent directories . and .. Also skip over any NULL file names.
1669 if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1670 logging.debug(f"RemoteWebSite.one_level_down(): \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1671 pass
1672 # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1673 elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1674 dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1675 logging.debug(f"RemoteWebSite.one_level_down(): \tftp dir (full path): {dirname:s}")
1676 directories.append(dirname)
1677 # For a file: Add the full path prefix from the root to the file name.
1678 else:
1679 file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1680 logging.debug(f"RemoteWebSite.one_level_down(): \tftp file (full path):\
1681 {file_info[self.user_settings.FILE_NAME]:s}")
1682 files.append(file_info)
1683 else:
1684 logging.error(f"RemoteWebSite.one_level_down(): \tFTP LIST line is too short: {line:s}")
1685
1686 directories.sort()
1687 files.sort()
1688
1689 return directories, files
1690
1691 def modtime(self, f):
1692 """Get the modification time of a file via ftp. Return 0 if ftp cannot get it."""
1693 modtime = 0
1694
1695 try:
1696 response = self.ftp.sendcmd('MDTM ' + f)
1697 # MDTM returns the last modified time of the file in the format
1698 # "213 YYYYMMDDhhmmss \r\n <error-response>
1699 # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1700 # error-response is 550 for info not available, and 500 or 501 if command cannot
1701 # be parsed.
1702 if response[:3] == '213':
1703 modtime = response[4:]
1704 except ftplib.error_perm as detail:
1705 logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1706 modtime = 0
1707
1708 return modtime
1709
1710 def parse_ftp_list(self, line):
1711 """Parse the ftp file listing and return file name, datetime and file size.
1712
1713 An FTP LIST command will give output which looks like this for a file:
1714
1715 -rw-r--r-- 1 1000 free 4084 Jul 18 16:55 sparkCoil.png
1716
1717 and for a directory:
1718
1719 drwxr-xr-x 2 1000 free 4096 Jul 18 16:36 ReadingList
1720
1721 FTP uses UTC for its listings; the conversion to local time is done by the OS.
1722 We can have problems on New Year's Eve. For example, the local file date/time is
1723
1724 Mon Jan 1 06:23:12 2018
1725
1726 But the remote file date/time from FTP listing doesn't show a year even though we
1727 know it was written to the server in 2017.
1728
1729 Mon Dec 31 03:02:00
1730
1731 So we default the remote file year to current year 2018 and get
1732
1733 Mon Dec 31 03:02:00 2018
1734
1735 Now we think that the remote file is newer by 363.860278 days.
1736 """
1737
1738 # Find out if we've a directory or a file.
1739 if line[0] == 'd':
1740 dir_or_file = FileType.DIRECTORY
1741 else:
1742 dir_or_file = FileType.FILE
1743
1744 pattern = self.user_settings.FTP_LISTING
1745
1746 # Sensible defaults.
1747 filesize = 0
1748 filename = ""
1749 # Default the time to midnight.
1750 hour = 0
1751 minute = 0
1752 seconds = 0
1753 # Default the date to Jan 1 of the current year.
1754 month = 1
1755 day = 1
1756 year = WebSite.get_current_year()
1757
1758 # Extract time and date from the ftp listing.
1759 match = pattern.search(line)
1760
1761 if match:
1762 filesize = int(match.group('bytes'))
1763 month = self.user_settings.monthToNumber[match.group('mon')]
1764 day = int(match.group('day'))
1765
1766 # Remote file listing contains the year. The FTP listing will omit the hour and minute.
1767 if match.group('year'):
1768 year = int(match.group('year'))
1769 logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1770 else:
1771 # Remote file listing omits the year. Default the year to the current UTC time year.
1772 # That may be incorrect (see comments above).
1773 year = WebSite.get_current_year()
1774 logging.debug(f"ftp is missing the year; use the current year = {year}")
1775
1776 # If the FTP listing has the hour and minute, it will omit the year.
1777 if match.group('hour') and match.group('min'):
1778 hour = int(match.group('hour'))
1779 minute = int(match.group('min'))
1780 logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1781
1782 filename = match.group('filename')
1783
1784 # Package up the time and date nicely.
1785 # Note if we didn't get any matches, we'll default the remote date and
1786 # time to Jan 1 midnight of the current year.
1787 d = datetime.datetime(year, month, day, hour, minute, seconds)
1788
1789 return [filename, dir_or_file, d, filesize]
1790
1791# ----------------------------------------------------------------------------
1792# Class for synchronizing local and remote web sites.
1793# ----------------------------------------------------------------------------
1794
1795class UpdateWeb(object):
1796 """Given previously scanned local and remote directories, update the remote website."""
1797
1798 def __init__(
1799 self,
1800 settings,
1801 server,
1802 user,
1803 password,
1804 ftproot,
1805 file_size_limit,
1806 local_directory_list,
1807 local_file_info,
1808 remote_directory_list,
1809 remote_file_info):
1810 """Connect to remote site. Accept previously scanned local and remote files and directories."""
1811
1812 self.user_settings = settings
1813
1814 self.local_files_list = []
1815 self.remote_files_list = []
1816 self.local_file_to_size = {}
1817 self.local_file_to_date_time = {}
1818 self.remote_file_to_date_time = {}
1819 self.local_only_dirs = []
1820 self.local_only_files = []
1821 self.remote_only_dirs = []
1822 self.remote_only_files = []
1823 self.common_files = []
1824
1825 # Connect to FTP server and log in.
1826 try:
1827 self.ftp = ftplib.FTP(server)
1828 self.ftp.login(user, password)
1829 except Exception as detail:
1830 logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1831 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1832 else:
1833 logging.debug("ftp login succeeded.")
1834
1835 logging.debug(f"ftp server welcome message: {self.ftp.getwelcome():s}")
1836
1837 # Local root directory.
1838 self.local_root_dir = self.user_settings.local_root_dir
1839 logging.debug(f"Local root directory: {self.local_root_dir:s}")
1840
1841 # Root directory of FTP server.
1842 self.ftp_root_dir = ftproot
1843 logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1844
1845 # Transform KB string to integer bytes. e.g. "200" => 2048000
1846 self.file_size_limit = int(file_size_limit) * 1024
1847
1848 try:
1849 # Go to the root directory.
1850 self.ftp.cwd(self.ftp_root_dir)
1851
1852 # Read it back.
1853 self.ftp_root_dir = self.ftp.pwd()
1854 logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1855 except Exception as detail:
1856 logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1857
1858 self.local_directory_list = local_directory_list
1859 self.remote_directory_list = remote_directory_list
1860 self.local_file_info = local_file_info
1861 self.remote_file_info = remote_file_info
1862
1863 def append_root_dir(self, root_dir, name):
1864 """Append the root directory to a path"""
1865
1866 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1867 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1868 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1869 return root_dir + name
1870 else:
1871 return root_dir + "/" + name
1872
1873 def file_info(self):
1874 """Create lists of file names from the file information. Also create dictionaries which map file names onto
1875 dates, times, and sizes."""
1876
1877 # Extract file names.
1878 self.local_files_list = [
1879 file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1880 self.remote_files_list = [
1881 file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1882
1883 # Use a dictionary comprehension to create key/value pairs,
1884 # (file name, file date/time)
1885 # which map file names onto date/time.
1886 self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1887 self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1888
1889 # Dictionary comprehension creates a mapping of local file names onto file sizes.
1890 self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1891
1892 def update(self):
1893 """Scan through the local website, cleaning it up.
1894 Go to remote website on my servers and synchronize all files."""
1895
1896 self.file_info()
1897
1898 # Which files and directories are different.
1899 self.changes()
1900
1901 # Synchronize with the local web site.
1902 self.synchronize()
1903
1904 def changes(self):
1905 """Find the set of different directories and files on local and remote."""
1906
1907 # Add all directories which are only on local to the dictionary.
1908 dir_to_type = {
1909 d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1910
1911 # Scan through all remote directories, adding those only on remote or
1912 # on both.
1913 for d in self.remote_directory_list:
1914 if d in dir_to_type:
1915 dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1916 else:
1917 dir_to_type[d] = FileType.ON_REMOTE_ONLY
1918
1919 # Add all files which are only on local to the dictionary.
1920 file_to_type = {
1921 f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1922
1923 # Scan through all remote files, adding those only on remote or on
1924 # both.
1925 for f in self.remote_files_list:
1926 if f in file_to_type:
1927 file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1928 else:
1929 file_to_type[f] = FileType.ON_REMOTE_ONLY
1930
1931 logging.debug("Raw dictionary dump of directories")
1932 for k, v in dir_to_type.items():
1933 logging.debug(f"\t dir: {str(k):s} type: {str(v):s}")
1934
1935 logging.debug("Raw dictionary dump of files")
1936 for k, v in file_to_type.items():
1937 logging.debug(f"\t file: {str(k):s} type: {str(v):s}")
1938
1939 # List of directories only on local. Keep the ordering.
1940 self.local_only_dirs = [
1941 d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1942
1943 # List of directories only on remote. Keep the ordering.
1944 self.remote_only_dirs = [
1945 d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1946
1947 # We don't care about common directories, only their changed files, if
1948 # any.
1949
1950 # List of files only on local. Keep the ordering.
1951 self.local_only_files = [
1952 f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1953
1954 # List of files only on remote. Keep the ordering.
1955 self.remote_only_files = [
1956 f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1957
1958 # List of common files on both local and remote. Keep the ordering.
1959 self.common_files = [
1960 f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1961
1962 logging.debug("*** Directories only on local ******************************")
1963 for d in self.local_only_dirs:
1964 logging.debug(f"\t {d:s}")
1965
1966 logging.debug("*** Directories only on remote ******************************")
1967 for d in self.remote_only_dirs:
1968 logging.debug(f"\t {d:s}")
1969
1970 logging.debug("*** Files only on local ******************************")
1971 for f in self.local_only_files:
1972 logging.debug(f"\t {f:s}")
1973
1974 logging.debug("*** Files only on remote ******************************")
1975 for f in self.remote_only_files:
1976 logging.debug(f"\t {f:s}")
1977
1978 logging.debug("*** Common files ******************************")
1979 for f in self.common_files:
1980 logging.debug(f"name {f:s}")
1981 logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1982 logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1983
1984 def synchronize(self):
1985 """Synchronize files and subdirectories in the remote directory with the local directory."""
1986
1987 # If we have the same files in local and remote, compare their times
1988 # and dates.
1989 for f in self.common_files:
1990 local_file_time = self.local_file_to_date_time[f]
1991 remote_file_time = self.remote_file_to_date_time[f]
1992
1993 # What's the time difference?
1994 time_delta = remote_file_time - local_file_time
1995 # How much difference, either earlier or later?
1996 seconds_different = abs(time_delta.total_seconds())
1997 minutes_different = seconds_different / 60.0
1998 hours_different = minutes_different / 60.0
1999 days_different = hours_different / 24.0
2000
2001 # Assume no upload initially.
2002 upload_to_host = False
2003
2004 logging.debug(f"Common file: {f:s}.")
2005
2006 # Remote file time is newer.
2007 # Allow 200 characters
2008 # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
2009
2010 if remote_file_time > local_file_time:
2011 # Remote file time is MUCH newer: suspect time is out of joint on the server, so upload local local file to be safe.
2012 if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
2013 logging.error(f"Remote file {f:s} is MUCH newer[more than {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes] by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days. Upload the file to be safe.")
2014 logging.error(f"\tlocal time {local_file_time.ctime():s}")
2015 logging.error(f"\tremote time {remote_file_time.ctime():s}")
2016
2017 # Set the local file to the current time.
2018 full_file_name = self.append_root_dir(
2019 self.local_root_dir, f)
2020 if os.path.exists(full_file_name):
2021 # Change the access and modify times of the file to the current time.
2022 os.utime(full_file_name, None)
2023 logging.error(f"Touching local file {full_file_name:s} to make it the current time")
2024
2025 upload_to_host = True
2026 # Remote file time is newer, but not by much. Let's just assume a slight time mismatch on the server. Don't upload.
2027 else:
2028 logging.error(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
2029 logging.error(f"\tlocal time {local_file_time.ctime():s}")
2030 logging.error(f"\tremote time {remote_file_time.ctime():s}")
2031 upload_to_host = False
2032
2033 # Local file time is newer.
2034 elif local_file_time > remote_file_time:
2035 # Local file time slightly newer than the remote file. So we are pretty sure the local file really got changed vs the server file.
2036 if days_different >= self.user_settings.DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
2037 logging.warning(f"Local file {f:20s} is SLIGHTLY newer [more than {self.user_settings.DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD} days] by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days. Preparing for upload.")
2038 logging.error(f"\tlocal time {local_file_time.ctime():s}")
2039 logging.error(f"\tremote time {remote_file_time.ctime():s}")
2040 upload_to_host = True
2041 else:
2042 logging.debug(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
2043 logging.error(f"\tlocal time {local_file_time.ctime():s}")
2044 logging.error(f"\tremote time {remote_file_time.ctime():s}")
2045 upload_to_host = False
2046
2047 # Cancel the upload if the file is too big for the server.
2048 size = self.local_file_to_size[f]
2049 if size >= self.file_size_limit:
2050 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
2051 upload_to_host = False
2052
2053 # Finally do the file upload.
2054 if upload_to_host:
2055 logging.debug(f"Uploading changed file {f:s}")
2056 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
2057 print(f"Uploading changed file {f:s}... ", end='', flush=True)
2058 self.upload(f)
2059
2060 # Remote directory is not in local. Delete it.
2061 for d in self.remote_only_dirs:
2062 logging.debug(f"Deleting remote only directory {d:s}")
2063 print(f"Deleting remote only directory {d:s}... ", end='', flush=True)
2064 self.rmdir(d)
2065
2066 # Local directory missing on remote. Create it.
2067 # Due to breadth first order scan, we'll create parent directories
2068 # before child directories.
2069 for d in self.local_only_dirs:
2070 logging.debug(f"Only on local. Creating new remote dir {d:s}.")
2071 print(f"Creating new remote directory {d:s}... ", end='', flush=True)
2072 self.mkdir(d)
2073
2074 # Local file missing on remote. Upload it.
2075 for f in self.local_only_files:
2076 logging.debug(f"Local only file. Uploading {f:s} to remote.")
2077
2078 # But cancel the upload if the file is too big for the server.
2079 size = self.local_file_to_size[f]
2080 if size >= self.file_size_limit:
2081 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d};"
2082 f" too large for server, limit is {self.file_size_limit:d} bytes")
2083 else:
2084 logging.debug(f"Uploading new file {f:s}")
2085 print(f"Uploading new file {f:s}... ", end='', flush=True)
2086 self.upload(f)
2087
2088 # Remote contains a file not present on the local. Delete the file.
2089 for f in self.remote_only_files:
2090 logging.debug(f"Remote only file. Deleting remote file {f:s}.")
2091 print(f"Deleting remote file {f:s}... ", end='', flush=True)
2092 self.del_remote(f)
2093
2094 def del_remote(self, relative_file_path):
2095 """Delete a file using ftp."""
2096
2097 logging.debug(f"del_remote(): \trelative file path name: {relative_file_path:s}")
2098
2099 # Parse the relative file path into file name and relative directory.
2100 relative_dir, file_name = os.path.split(relative_file_path)
2101 logging.debug(f"del_remote(): \tfile name: {file_name:s}")
2102 logging.debug(f"del_remote(): \trelative dir: {relative_dir:s}")
2103 logging.debug(f"del_remote(): \tremote root dir: {self.ftp_root_dir:s}")
2104
2105 try:
2106 # Add the remote root path and go to the remote directory.
2107 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2108 logging.debug(f"del_remote(): \tftp cd remote dir: {remote_dir:s}")
2109 self.ftp.cwd(remote_dir)
2110 except Exception as detail:
2111 logging.error(f"del_remote(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2112 else:
2113 try:
2114 logging.debug(f"del_remote(): \tftp rm: {file_name:s}")
2115
2116 # Don't remove zero length file names.
2117 if len(file_name) > 0:
2118 self.ftp.delete(file_name)
2119 else:
2120 logging.warning(
2121 "fdel_remote(): skipping ftp delete; file NAME {file_name:s} had zero length")
2122 except Exception as detail:
2123 logging.error(f"del_remote(): \tCannot ftp rm: {str(detail):s}")
2124
2125 def mkdir(self, relative_dir):
2126 """Create new remote directory using ftp."""
2127
2128 logging.debug(f"mkdir(): \trelative dir path name: {relative_dir:s}")
2129 logging.debug(f"mkdir(): \tremote root dir: {self.ftp_root_dir:s}")
2130
2131 # Parse the relative dir path into prefix dir and suffix dir.
2132 path, d = os.path.split(relative_dir)
2133 logging.debug(f"mkdir(): \tremote prefix dir: {path:s}")
2134 logging.debug(f"mkdir(): \tremote dir: {d:s}")
2135
2136 try:
2137 # Add the remote root path and go to the remote directory.
2138 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2139 logging.debug(f"mkdir(): \tftp cd remote dir: {remote_dir:s}")
2140 self.ftp.cwd(remote_dir)
2141 except Exception as detail:
2142 logging.error(f"mkdir(): \tCannot ftp chrdir: {str(detail):s} Skipping...")
2143 else:
2144 try:
2145 logging.debug(f"mkdir(): \tftp mkd: {d:s}")
2146 self.ftp.mkd(d)
2147 except Exception as detail:
2148 logging.error(f"mkdir(): \tCannot ftp mkdir: {str(detail):s}")
2149
2150 def rmdir(self, relative_dir):
2151 """Delete an empty directory using ftp."""
2152
2153 logging.debug(f"rmdir(): \tintermediate dir path name: {relative_dir:s}")
2154 logging.debug(f"rmdir(): \tremote root dir: {self.ftp_root_dir:s}")
2155
2156 # Parse the relative dir path into prefix dir and suffix dir.
2157 path, d = os.path.split(relative_dir)
2158 logging.debug(f"rmdir(): \tremote prefix dir: {path:s}")
2159 logging.debug(f"rmdir(): \tremote dir: {d:s}")
2160
2161 try:
2162 # Add the remote root path and go to the remote directory.
2163 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2164 logging.debug(f"rmdir(): \tftp cd remote dir: {remote_dir:s}")
2165 self.ftp.cwd(remote_dir)
2166 except Exception as detail:
2167 logging.error(f"rmdir(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2168 else:
2169 try:
2170 logging.debug(f"rmdir(): \tftp rmd: {d:s}")
2171 self.ftp.rmd(d)
2172 except Exception as detail:
2173 logging.error(f"rmdir(): \tCannot ftp rmdir dir {d:s}: {str(detail):s}. Directory is probably not empty. Do a manual delete.")
2174
2175 def download(self, relative_file_path):
2176 """Download a binary file using ftp."""
2177
2178 logging.debug(f"download(): \tfile name: {relative_file_path:s}")
2179
2180 # Parse the relative file path into file name and relative directory.
2181 relative_dir, file_name = os.path.split(relative_file_path)
2182 logging.debug(f"download(): \tfile name: {file_name:s}")
2183 logging.debug(f"download(): \trelative dir: {relative_dir:s}")
2184 logging.debug(f"download(): \troot dir: {self.ftp_root_dir:s}")
2185
2186 # Add the remote root path and go to the remote directory.
2187 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2188 logging.debug(f"download(): \tftp cd remote dir: {remote_dir:s}")
2189
2190 try:
2191 self.ftp.cwd(remote_dir)
2192 except Exception as detail:
2193 logging.error(f"download(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2194 else:
2195 # Add the local root path to get the local file name.
2196 # Open local binary file to write into.
2197 local_file_name = self.append_root_dir(
2198 self.local_root_dir, relative_file_path)
2199 logging.debug(f"download(): \topen local file name: {local_file_name:s}")
2200 try:
2201 f = open(local_file_name, "wb")
2202 try:
2203 # Calls f.write() on each block of the binary file.
2204 # ftp.retrbinary( "RETR " + file_name, f.write )
2205 pass
2206 except Exception as detail:
2207 logging.error(f"download(): \tCannot cannot ftp retrbinary: {str(detail):s}")
2208 f.close()
2209 except IOError as detail:
2210 logging.error(f"download(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2211
2212 def upload(self, relative_file_path):
2213 """Upload a binary file using ftp."""
2214
2215 logging.debug(f"upload(): \trelative file path name: {relative_file_path:s}")
2216
2217 # Parse the relative file path into file name and relative directory.
2218 relative_dir, file_name = os.path.split(relative_file_path)
2219 logging.debug(f"upload(): \tfile name: {file_name:s}")
2220 logging.debug(f"upload(): \trelative dir: {relative_dir:s}")
2221 logging.debug(f"upload(): \tremote root dir: {self.ftp_root_dir:s}")
2222
2223 # Add the remote root path and go to the remote directory.
2224 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2225 logging.debug(f"upload(): \tftp cd remote dir: {remote_dir:s}")
2226
2227 try:
2228 self.ftp.cwd(remote_dir)
2229 except Exception as detail:
2230 logging.error(f"upload(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2231 else:
2232 # Add the local root path to get the local file name.
2233 # Open local binary file to read from.
2234 local_file_name = self.append_root_dir(
2235 self.local_root_dir, relative_file_path)
2236 logging.debug(f"upload(): \topen local file name: {local_file_name:s}")
2237
2238 try:
2239 f = open(local_file_name, "rb")
2240 try:
2241 # f.read() is called on each block of the binary file until
2242 # EOF.
2243 logging.debug(f"upload(): \tftp STOR file {file_name:s}")
2244 self.ftp.storbinary("STOR " + file_name, f)
2245 except Exception as detail:
2246 logging.error(f"upload(): \tCannot ftp storbinary: {str(detail):s}")
2247 f.close()
2248 except IOError as detail:
2249 logging.error(f"upload(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2250
2251 def finish(self):
2252 """Log out of an ftp session"""
2253 logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2254 try:
2255 self.ftp.quit()
2256 except Exception as detail:
2257 logging.error(f"Cannot ftp quit because {str(detail):s}")
2258
2259# ----------------------------------------------------------------------------
2260# Main function
2261# ----------------------------------------------------------------------------
2262
2263def main(raw_args=None):
2264 """Main program. Clean up and update my website."""
2265
2266 # Print the obligatory legal notice.
2267 print("""
2268 updateweb Version 7.1 - A Python utility program which maintains my web site.
2269 Copyright (C) 2007-2024 by Sean Erik O'Connor. All Rights Reserved.
2270
2271 It deletes temporary files, rewrites old copyright lines and email address
2272 lines in source files, then synchronizes all changes to my web sites.
2273
2274 updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2275 GNU General Public License. This is free software, and you are welcome
2276 to redistribute it under certain conditions; see the GNU General Public
2277 License for details.
2278 """)
2279
2280 # Put ALL the main code into a try block!
2281 try:
2282 # ---------------------------------------------------------------------
2283 # Load default settings and start logging.
2284 # ---------------------------------------------------------------------
2285
2286 # Default user settings.
2287 user_settings = UserSettings()
2288
2289 print( f"Running main( {raw_args} ) Python version\
2290 {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2291 local web directory\
2292 {user_settings.local_root_dir}\n")
2293 # Get command line options such as --verbose. Pass them back as flags in
2294 # user_settings.
2295 CommandLineSettings(user_settings, raw_args)
2296
2297 # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2298 if user_settings.UNITTEST:
2299 suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2300 unittest.TextTestRunner(verbosity=2).run(suite)
2301 # We are done!
2302 print(" ...done!", flush=True)
2303 return
2304
2305 # Start logging to file. Verbose turns on logging for
2306 # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
2307 # otherwise we log only WARNING, ERROR, and CRITICAL levels.
2308 if user_settings.VERBOSE:
2309 loglevel = logging.DEBUG
2310 else:
2311 loglevel = logging.WARNING
2312
2313 # Pick the log file name on the host.
2314 if user_settings.CLEAN:
2315 user_settings.LOGFILENAME = "/private/logLocal.txt"
2316 else:
2317 user_settings.LOGFILENAME = "/private/logRemote.txt"
2318
2319 # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2320 if not user_settings.MATHJAX:
2321 user_settings.DIR_TO_SKIP += "|mathjax"
2322 else:
2323 print(f"Processing and uploading mathjax files. git restore any changed files and git clean -f to remove extra files... ", end='', flush=True)
2324 print(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually. If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections... ", end='', flush=True)
2325 logging.debug(f"Processing and uploading mathjax files. git restore any changed files and git clean -f to remove extra files.")
2326 logging.debug(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually. If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections...")
2327
2328 logging.basicConfig(
2329 level=loglevel,
2330 format='%(asctime)s %(levelname)-8s %(message)s',
2331 datefmt='%a, %d %b %Y %H:%M:%S',
2332 filename=user_settings.local_root_dir + user_settings.LOGFILENAME,
2333 filemode='w')
2334
2335 logging.debug("********** Begin logging")
2336
2337 # ---------------------------------------------------------------------
2338 # Scan the local website, finding out all files and directories.
2339 # ---------------------------------------------------------------------
2340
2341 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
2342 print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}... ", end='', flush=True)
2343 logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2344
2345 local = LocalWebSite(user_settings)
2346 local.scan()
2347
2348 # ---------------------------------------------------------------------
2349 # Clean up local website.
2350 # ---------------------------------------------------------------------
2351
2352 # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2353 print("Cleaning local web site... ", end='', flush=True)
2354 logging.debug("========================== Cleaning the local web site")
2355 local.clean()
2356
2357 # We are done with the first scan of the local web site and will dispose of it.
2358 local.finish()
2359 del local
2360
2361 # ---------------------------------------------------------------------
2362 # Rescan the local website since there will be changes to source
2363 # files from the clean up stage.
2364 # ---------------------------------------------------------------------
2365
2366 print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}", end='', flush=True)
2367 logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2368
2369 local = LocalWebSite(user_settings)
2370
2371 local.scan()
2372
2373 # ---------------------------------------------------------------------
2374 # List all the local directories and files and their sizes.
2375 # ---------------------------------------------------------------------
2376
2377 # Local website directories.
2378 local_directory_list = local.directories
2379 logging.debug("********** List of all the Local Directories")
2380 for d in local_directory_list:
2381 logging.debug(f"\t {d:s}")
2382
2383 # Generate lists of the local website filenames only, and their sizes in bytes.
2384 local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2385 total_number_of_files = len( local_files_name_size_pairs )
2386 logging.debug(f"********** List of all the Local Files from largest to smallest. There are {total_number_of_files:15d} files.")
2387 local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2388
2389 # Local website filenames only, and their dates and times.
2390 local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2391 logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2392 for file_datetime_pair in local_file_datetime_pairs:
2393 logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2394
2395 # Total number of bytes in the local files.
2396 total_number_of_bytes = 0
2397 for file_size_pair in local_files_name_size_pairs:
2398 logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2399 total_number_of_bytes += file_size_pair[1]
2400 logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2401
2402 local.finish()
2403
2404 if user_settings.CLEAN:
2405 logging.debug("========================== Done with local file and directory cleanup...")
2406 del local
2407 print("...done!", flush=True)
2408 return
2409
2410 # ---------------------------------------------------------------------
2411 # Scan the remote hosted web site.
2412 # ---------------------------------------------------------------------
2413
2414 print("Scanning remote web site...", end='', flush=True)
2415 logging.debug("========================== Scanning the remote web site...")
2416
2417 # Pick which website to update.
2418 logging.debug("Connecting to primary remote site.")
2419 remote = RemoteWebSite(user_settings,
2420 user_settings.SERVER_NAME,
2421 user_settings.USER_NAME,
2422 user_settings.PASSWORD_NAME,
2423 user_settings.FTP_ROOT_NAME)
2424 remote.scan()
2425 remote.finish()
2426
2427 # ---------------------------------------------------------------------
2428 # List all the remote server directories and files and their sizes.
2429 # ---------------------------------------------------------------------
2430
2431 remote_directory_list = remote.directories
2432 logging.debug("********** Remote Directories")
2433 for d in remote_directory_list:
2434 logging.debug(f"\t {d:s}")
2435
2436 # Local website filenames only, and their sizes in bytes.
2437 remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2438 total_number_of_files = len( remote_files_name_size_list )
2439 logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2440 remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2441 total_number_of_bytes = 0
2442 for file_size in remote_files_name_size_list:
2443 logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2444 total_number_of_bytes += file_size[1]
2445 logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2446
2447 # ---------------------------------------------------------------------
2448 # Synchronize the local and remote web sites.
2449 # ---------------------------------------------------------------------
2450
2451 print("Synchronizing remote and local web sites...", end='', flush=True)
2452 logging.debug("========================= Synchronizing remote and local web sites...")
2453
2454 # Primary website.
2455 logging.debug("Connecting to primary remote site for synchronization.")
2456 sync = UpdateWeb(user_settings,
2457 user_settings.SERVER_NAME,
2458 user_settings.USER_NAME,
2459 user_settings.PASSWORD_NAME,
2460 user_settings.FTP_ROOT_NAME,
2461 user_settings.FILE_SIZE_LIMIT_NAME,
2462 local.directories,
2463 local.files,
2464 remote.directories,
2465 remote.files)
2466
2467 sync.update()
2468 sync.finish()
2469
2470 del sync
2471 del remote
2472 del local
2473 print("...done!", flush=True)
2474
2475 except UpdateWebException as detail:
2476 logging.error(f"Couldn't update the web directory: {str(detail):s}. Aborting...")
2477
2478 except RecursionError as detail:
2479 logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}. Aborting...")
2480
2481if __name__ == '__main__':
2482 """Python executes all code in this file. Finally, we come here.
2483
2484 * If we are executing this file as a standalone Python script,
2485 the name of the current module is set to __main__ and thus we'll call the main() function.
2486
2487 * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2488
2489 import updateweb
2490 updateweb.main(["--test"])"""
2491
2492 main()