Modifed code to satisfy code review for MR (c3523219) · Commits · GSHS Utilities / common-package

src/common/error_codes.py

+8 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		# -- coding: utf-8 --
		'''
		This module defines a data class 'ErrorCodes' that instantiates various error codes used throughout the application.
		It categorizes error codes into distinct sections for database operations, scraping processes,
		templating issues, and provides a default error code for general use. Each error type is associated with specific
		integer values, making it easier to manage and identify errors consistently across different components of the application.
		'''
		from dataclasses import dataclass

src/common/exceptions.py

+7 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		# -- coding: utf-8 --
		'''
		This module defines custom exception classes for various application components. Each exception class validates
		specific error codes from 'ErrorCodes'. Included are 'ScraperException', 'DatabaseException', 'TemplateException', and 'ParserError',
		each tailored to a specific component and type of error within the application.
		'''
		from common.error_codes import ErrorCodes

src/common/scrapers/scripter.py

deleted100755 → 0

+0 −25

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		# -- coding: utf-8 --
		"""Provide a script handler method which can process sequentially, parallel."""
		import os
		from pathlib import Path
		from common.logz import create_logger


		def get_all_scripts(script_dir, logger=create_logger()):
		"""Retrieve all scripts in the specified directory."""
		r_scripts = []
		py_scripts = []
		sh_scripts = []
		script_path = Path(script_dir)

		if not script_path.is_dir():
		logger.error(f"{script_dir} is not a directory or does not exist.")
		pass
		logger.info(f"looking for scripts in {script_dir}")
		logger.debug(f"Directory contents: {os.listdir(script_dir)}")
		r_scripts = list(script_path.glob("*.R"))
		py_scripts = list(script_path.glob("*.py"))
		sh_scripts = list(script_path.glob("*.sh"))
		pl_scripts = list(script_path.glob("*.pl"))
		return [str(x.resolve) for x in r_scripts + py_scripts + sh_scripts + pl_scripts]

src/common/scrapers/static.py

+4 −3

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		# -- coding: utf-8 --
		"""Provide static objects."""
		from dataclasses import dataclass
		import re


		@dataclass
		class Colors:
		"""Provide a dataclass of colors for pretty printing."""

		@@ -17,7 +18,7 @@ class Colors:
		WHITE = "\33[37m"
		RESET = "\33[39m"


		@dataclass
		class UrlRegex:
		"""Define a URL Regex."""

		@@ -31,7 +32,7 @@ class UrlRegex:
		+ r"([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*"
		)


		@dataclass
		class ImageConfig:
		"""Static dictionaries and values for OCR of images."""

src/common/scrapers/webdriver.py

+1 −87

Original line number	Diff line number	Diff line
		@@ -336,89 +336,3 @@ class WebDriver:
		raise ParserError(msg)
		except Exception as e:
		self.logger.error(f"Unknown exception while waiting for element: " f"{e}")
		No newline at end of file


		if __name__ == "__main__":
		import time
		from bs4 import BeautifulSoup as Soup
		from selenium.webdriver.common.action_chains import ActionChains
		from selenium.webdriver.support.ui import Select

		test_url = "http://www.example.com"
		print("Testing if requests work")
		with WebDriver(url=test_url, driver="curl") as d:
		source = d.dump_out()
		print(source)
		with WebDriver(
		url=test_url,
		driver="chromedriver",
		options=[
		"--no-sandbox",
		"--disable-gpu",
		"--disable-logging",
		"--disable-setuid-sandbox",
		"--disable-dev-shm-usage",
		"--no-zygote",
		"headless",
		],
		service_args=["--ignore-ssl-errors=true", "--ssl-protocol=any"],
		) as d:
		source = d.driver.page_source
		print(source)
		print("Running example EI ID 182")
		url_182 = "https://ebill.kcelectric.coop/woViewer/mapviewer.html?"
		url_182 = url_182 + "config=Outage+Web+Map"
		chrome_opts = [
		"--no-sandbox",
		"--disable-gpu",
		"--disable-logging",
		"--disable-setuid-sandbox",
		"--disable-dev-shm-usage",
		"--no-zygote",
		"headless",
		]
		service_args = ["--ignore-ssl-errors=true", "--ssl-protocol=any"]
		with WebDriver(
		url=url_182,
		driver="chromedriver",
		options=chrome_opts,
		service_args=service_args,
		) as d:
		xpath = '//div[@id="OMS.Customers Summary"]'
		# d.wait_for_element(xpath, 'xpath')
		# arget = d.driver.find_element_by_xpath(xpath)
		target = d.get_xpath(xpath)
		ActionChains(d.driver).move_to_element(target).click(target).perform()
		d.wait_for_element("select", "tag")
		select = Select(d.driver.find_element_by_tag_name("select"))
		time.sleep(2)
		select.select_by_visible_text("County")
		source = d.driver.page_source
		soup = Soup(source, "html.parser")
		table = soup.findAll("table", {"class": "GNBU0IVDGE summary-table"})
		rows = table[0].find_all("td")
		regions = []
		custs_out = []
		custs_served = []
		for row in rows:
		if "summary-region-column" in str(row):
		regions.append(
		row.get_text()
		.replace(" County", "")
		.replace(" COUNTY", "")
		.strip()
		.replace("ST ", "ST. ")
		)
		elif "summary-number-out-column" in str(row):
		custs_out.append(row.get_text())
		elif (
		"summary-number-served-column" in str(row)
		and "GMFGE5DLD" not in str(row)
		and "%" not in row.get_text()
		):
		custs_served.append(row.get_text())
		else:
		pass
		print("Regions found: %s" % regions)
		print("Customers out: %s" % custs_out)
		print("Custs served: %s" % custs_served)