Commit c3523219 authored by Jacob's avatar Jacob
Browse files

Modifed code to satisfy code review for MR

parent 53d58f1d
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
This module defines a data class 'ErrorCodes' that instantiates various error codes used throughout the application.
It categorizes error codes into distinct sections for database operations, scraping processes,
templating issues, and provides a default error code for general use. Each error type is associated with specific
integer values, making it easier to manage and identify errors consistently across different components of the application.
'''
from dataclasses import dataclass


+7 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
This module defines custom exception classes for various application components. Each exception class validates
specific error codes from 'ErrorCodes'. Included are 'ScraperException', 'DatabaseException', 'TemplateException', and 'ParserError',
each tailored to a specific component and type of error within the application.
'''
from common.error_codes import ErrorCodes


src/common/scrapers/scripter.py

deleted100755 → 0
+0 −25
Original line number Diff line number Diff line
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Provide a script handler method which can process sequentially, parallel."""
import os
from pathlib import Path
from common.logz import create_logger


def get_all_scripts(script_dir, logger=create_logger()):
    """Retrieve all scripts in the specified directory."""
    r_scripts = []
    py_scripts = []
    sh_scripts = []
    script_path = Path(script_dir)

    if not script_path.is_dir():
        logger.error(f"{script_dir} is not a directory or does not exist.")
        pass
    logger.info(f"looking for scripts in {script_dir}")
    logger.debug(f"Directory contents: {os.listdir(script_dir)}")
    r_scripts = list(script_path.glob("*.R"))
    py_scripts = list(script_path.glob("*.py"))
    sh_scripts = list(script_path.glob("*.sh"))
    pl_scripts = list(script_path.glob("*.pl"))
    return [str(x.resolve) for x in r_scripts + py_scripts + sh_scripts + pl_scripts]
+4 −3
Original line number Diff line number Diff line
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Provide static objects."""
from dataclasses import dataclass
import re


@dataclass
class Colors:
    """Provide a dataclass of colors for pretty printing."""

@@ -17,7 +18,7 @@ class Colors:
    WHITE = "\33[37m"
    RESET = "\33[39m"


@dataclass
class UrlRegex:
    """Define a URL Regex."""

@@ -31,7 +32,7 @@ class UrlRegex:
        + r"([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*"
    )


@dataclass
class ImageConfig:
    """Static dictionaries and values for OCR of images."""

+1 −87
Original line number Diff line number Diff line
@@ -336,89 +336,3 @@ class WebDriver:
            raise ParserError(msg)
        except Exception as e:
            self.logger.error(f"Unknown exception while waiting for element: " f"{e}")
 No newline at end of file


if __name__ == "__main__":
    import time
    from bs4 import BeautifulSoup as Soup
    from selenium.webdriver.common.action_chains import ActionChains
    from selenium.webdriver.support.ui import Select

    test_url = "http://www.example.com"
    print("Testing if requests work")
    with WebDriver(url=test_url, driver="curl") as d:
        source = d.dump_out()
    print(source)
    with WebDriver(
        url=test_url,
        driver="chromedriver",
        options=[
            "--no-sandbox",
            "--disable-gpu",
            "--disable-logging",
            "--disable-setuid-sandbox",
            "--disable-dev-shm-usage",
            "--no-zygote",
            "headless",
        ],
        service_args=["--ignore-ssl-errors=true", "--ssl-protocol=any"],
    ) as d:
        source = d.driver.page_source
    print(source)
    print("Running example EI ID 182")
    url_182 = "https://ebill.kcelectric.coop/woViewer/mapviewer.html?"
    url_182 = url_182 + "config=Outage+Web+Map"
    chrome_opts = [
        "--no-sandbox",
        "--disable-gpu",
        "--disable-logging",
        "--disable-setuid-sandbox",
        "--disable-dev-shm-usage",
        "--no-zygote",
        "headless",
    ]
    service_args = ["--ignore-ssl-errors=true", "--ssl-protocol=any"]
    with WebDriver(
        url=url_182,
        driver="chromedriver",
        options=chrome_opts,
        service_args=service_args,
    ) as d:
        xpath = '//div[@id="OMS.Customers Summary"]'
        # d.wait_for_element(xpath, 'xpath')
        # arget = d.driver.find_element_by_xpath(xpath)
        target = d.get_xpath(xpath)
        ActionChains(d.driver).move_to_element(target).click(target).perform()
        d.wait_for_element("select", "tag")
        select = Select(d.driver.find_element_by_tag_name("select"))
        time.sleep(2)
        select.select_by_visible_text("County")
        source = d.driver.page_source
    soup = Soup(source, "html.parser")
    table = soup.findAll("table", {"class": "GNBU0IVDGE summary-table"})
    rows = table[0].find_all("td")
    regions = []
    custs_out = []
    custs_served = []
    for row in rows:
        if "summary-region-column" in str(row):
            regions.append(
                row.get_text()
                .replace(" County", "")
                .replace(" COUNTY", "")
                .strip()
                .replace("ST ", "ST. ")
            )
        elif "summary-number-out-column" in str(row):
            custs_out.append(row.get_text())
        elif (
            "summary-number-served-column" in str(row)
            and "GMFGE5DLD" not in str(row)
            and "%" not in row.get_text()
        ):
            custs_served.append(row.get_text())
        else:
            pass
    print("Regions found: %s" % regions)
    print("Customers out: %s" % custs_out)
    print("Custs served: %s" % custs_served)