Merge pull request #6 from VishwamAI/update-test-authenticator

Update Test Authenticator
VishwamAI · Oct 4, 2024 · 1f407c2 · 1f407c2
2 parents 75829fe + 83164ed
commit 1f407c2
Show file tree

Hide file tree

Showing 21 changed files with 4,007 additions and 0 deletions.
diff --git a/backend/app_config.py b/backend/app_config.py
@@ -0,0 +1,2 @@
+MINIMUM_LOG_LEVEL = "INFO"
+MINIMUM_WAIT_TIME = 5
diff --git a/backend/main.py b/backend/main.py
@@ -0,0 +1,235 @@
+import os
+import re
+import sys
+from pathlib import Path
+import yaml
+import click
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service as ChromeService
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.common.exceptions import WebDriverException
+from lib_resume_builder_AIHawk import Resume,StyleManager,FacadeManager,ResumeGenerator
+from src.utils import chrome_browser_options
+from src.llm.llm_manager import GPTAnswerer
+from src.aihawk_authenticator import AIHawkAuthenticator
+from src.aihawk_bot_facade import AIHawkBotFacade
+from src.aihawk_job_manager import AIHawkJobManager
+from src.job_application_profile import JobApplicationProfile
+from loguru import logger
+
+# Suppress stderr
+sys.stderr = open(os.devnull, 'w')
+
+class ConfigError(Exception):
+    pass
+
+class ConfigValidator:
+    @staticmethod
+    def validate_email(email: str) -> bool:
+        return re.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', email) is not None
+
+    @staticmethod
+    def validate_yaml_file(yaml_path: Path) -> dict:
+        try:
+            with open(yaml_path, 'r') as stream:
+                return yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            raise ConfigError(f"Error reading file {yaml_path}: {exc}")
+        except FileNotFoundError:
+            raise ConfigError(f"File not found: {yaml_path}")
+
+
+    def validate_config(config_yaml_path: Path) -> dict:
+        parameters = ConfigValidator.validate_yaml_file(config_yaml_path)
+        required_keys = {
+            'remote': bool,
+            'experienceLevel': dict,
+            'jobTypes': dict,
+            'date': dict,
+            'positions': list,
+            'locations': list,
+            'distance': int,
+            'companyBlacklist': list,
+            'titleBlacklist': list,
+            'llm_model_type': str,
+            'llm_model': str
+        }
+
+        for key, expected_type in required_keys.items():
+            if key not in parameters:
+                if key in ['companyBlacklist', 'titleBlacklist']:
+                    parameters[key] = []
+                else:
+                    raise ConfigError(f"Missing or invalid key '{key}' in config file {config_yaml_path}")
+            elif not isinstance(parameters[key], expected_type):
+                if key in ['companyBlacklist', 'titleBlacklist'] and parameters[key] is None:
+                    parameters[key] = []
+                else:
+                    raise ConfigError(f"Invalid type for key '{key}' in config file {config_yaml_path}. Expected {expected_type}.")
+
+        experience_levels = ['internship', 'entry', 'associate', 'mid-senior level', 'director', 'executive']
+        for level in experience_levels:
+            if not isinstance(parameters['experienceLevel'].get(level), bool):
+                raise ConfigError(f"Experience level '{level}' must be a boolean in config file {config_yaml_path}")
+
+        job_types = ['full-time', 'contract', 'part-time', 'temporary', 'internship', 'other', 'volunteer']
+        for job_type in job_types:
+            if not isinstance(parameters['jobTypes'].get(job_type), bool):
+                raise ConfigError(f"Job type '{job_type}' must be a boolean in config file {config_yaml_path}")
+
+        date_filters = ['all time', 'month', 'week', '24 hours']
+        for date_filter in date_filters:
+            if not isinstance(parameters['date'].get(date_filter), bool):
+                raise ConfigError(f"Date filter '{date_filter}' must be a boolean in config file {config_yaml_path}")
+
+        if not all(isinstance(pos, str) for pos in parameters['positions']):
+            raise ConfigError(f"'positions' must be a list of strings in config file {config_yaml_path}")
+        if not all(isinstance(loc, str) for loc in parameters['locations']):
+            raise ConfigError(f"'locations' must be a list of strings in config file {config_yaml_path}")
+
+        approved_distances = {0, 5, 10, 25, 50, 100}
+        if parameters['distance'] not in approved_distances:
+            raise ConfigError(f"Invalid distance value in config file {config_yaml_path}. Must be one of: {approved_distances}")
+
+        for blacklist in ['companyBlacklist', 'titleBlacklist']:
+            if not isinstance(parameters.get(blacklist), list):
+                raise ConfigError(f"'{blacklist}' must be a list in config file {config_yaml_path}")
+            if parameters[blacklist] is None:
+                parameters[blacklist] = []
+
+        return parameters
+
+
+
+    @staticmethod
+    def validate_secrets(secrets_yaml_path: Path) -> tuple:
+        secrets = ConfigValidator.validate_yaml_file(secrets_yaml_path)
+        mandatory_secrets = ['llm_api_key']
+
+        for secret in mandatory_secrets:
+            if secret not in secrets:
+                raise ConfigError(f"Missing secret '{secret}' in file {secrets_yaml_path}")
+
+        if not secrets['llm_api_key']:
+            raise ConfigError(f"llm_api_key cannot be empty in secrets file {secrets_yaml_path}.")
+        return secrets['llm_api_key']
+
+class FileManager:
+    @staticmethod
+    def find_file(name_containing: str, with_extension: str, at_path: Path) -> Path:
+        return next((file for file in at_path.iterdir() if name_containing.lower() in file.name.lower() and file.suffix.lower() == with_extension.lower()), None)
+
+    @staticmethod
+    def validate_data_folder(app_data_folder: Path) -> tuple:
+        if not app_data_folder.exists() or not app_data_folder.is_dir():
+            raise FileNotFoundError(f"Data folder not found: {app_data_folder}")
+
+        required_files = ['secrets.yaml', 'config.yaml', 'plain_text_resume.yaml']
+        missing_files = [file for file in required_files if not (app_data_folder / file).exists()]
+
+        if missing_files:
+            raise FileNotFoundError(f"Missing files in the data folder: {', '.join(missing_files)}")
+
+        output_folder = app_data_folder / 'output'
+        output_folder.mkdir(exist_ok=True)
+        return (app_data_folder / 'secrets.yaml', app_data_folder / 'config.yaml', app_data_folder / 'plain_text_resume.yaml', output_folder)
+
+    @staticmethod
+    def file_paths_to_dict(resume_file: Path | None, plain_text_resume_file: Path) -> dict:
+        if not plain_text_resume_file.exists():
+            raise FileNotFoundError(f"Plain text resume file not found: {plain_text_resume_file}")
+
+        result = {'plainTextResume': plain_text_resume_file}
+
+        if resume_file:
+            if not resume_file.exists():
+                raise FileNotFoundError(f"Resume file not found: {resume_file}")
+            result['resume'] = resume_file
+
+        return result
+
+def init_browser() -> webdriver.Chrome:
+    try:
+
+        options = chrome_browser_options()
+        service = ChromeService(ChromeDriverManager().install())
+        return webdriver.Chrome(service=service, options=options)
+    except Exception as e:
+        raise RuntimeError(f"Failed to initialize browser: {str(e)}")
+
+def create_and_run_bot(parameters, llm_api_key):
+    try:
+        logger.info("Initializing bot components...")
+        style_manager = StyleManager()
+        resume_generator = ResumeGenerator()
+        with open(parameters['uploads']['plainTextResume'], "r", encoding='utf-8') as file:
+            plain_text_resume = file.read()
+        resume_object = Resume(plain_text_resume)
+        resume_generator_manager = FacadeManager(llm_api_key, style_manager, resume_generator, resume_object, Path("data_folder/output"))
+        os.system('cls' if os.name == 'nt' else 'clear')
+
+        try:
+            logger.info("Choosing resume style...")
+            resume_generator_manager.choose_style()
+        except Exception as style_error:
+            logger.error(f"Error occurred while choosing resume style: {style_error}")
+            raise
+
+        os.system('cls' if os.name == 'nt' else 'clear')
+
+        logger.info("Creating job application profile...")
+        job_application_profile_object = JobApplicationProfile(plain_text_resume)
+
+        logger.info("Initializing browser and components...")
+        browser = init_browser()
+        login_component = AIHawkAuthenticator(browser)
+        apply_component = AIHawkJobManager(browser)
+        gpt_answerer_component = GPTAnswerer(parameters, llm_api_key)
+        bot = AIHawkBotFacade(login_component, apply_component)
+        bot.set_job_application_profile_and_resume(job_application_profile_object, resume_object)
+        bot.set_gpt_answerer_and_resume_generator(gpt_answerer_component, resume_generator_manager)
+        bot.set_parameters(parameters)
+
+        logger.info("Starting login process...")
+        bot.start_login()
+        logger.info("Starting job application process...")
+        bot.start_apply()
+    except WebDriverException as e:
+        logger.error(f"WebDriver error occurred: {e}")
+    except Exception as e:
+        logger.error(f"Unexpected error occurred: {e}")
+        raise RuntimeError(f"Error running the bot: {str(e)}")
+
+
+@click.command()
+@click.option('--resume', type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path), help="Path to the resume PDF file")
+def main(resume: Path = None):
+    try:
+        data_folder = Path("data_folder")
+        secrets_file, config_file, plain_text_resume_file, output_folder = FileManager.validate_data_folder(data_folder)
+
+        parameters = ConfigValidator.validate_config(config_file)
+        llm_api_key = ConfigValidator.validate_secrets(secrets_file)
+
+        parameters['uploads'] = FileManager.file_paths_to_dict(resume, plain_text_resume_file)
+        parameters['outputFileDirectory'] = output_folder
+
+        create_and_run_bot(parameters, llm_api_key)
+    except ConfigError as ce:
+        logger.error(f"Configuration error: {str(ce)}")
+        logger.error(f"Refer to the configuration guide for troubleshooting: https://github.com/feder-cr/AIHawk_AIHawk_automatic_job_application/blob/main/readme.md#configuration {str(ce)}")
+    except FileNotFoundError as fnf:
+        logger.error(f"File not found: {str(fnf)}")
+        logger.error("Ensure all required files are present in the data folder.")
+        logger.error("Refer to the file setup guide: https://github.com/feder-cr/AIHawk_AIHawk_automatic_job_application/blob/main/readme.md#configuration")
+    except RuntimeError as re:
+
+        logger.error(f"Runtime error: {str(re)}")
+
+        logger.error("Refer to the configuration and troubleshooting guide: https://github.com/feder-cr/AIHawk_AIHawk_automatic_job_application/blob/main/readme.md#configuration")
+    except Exception as e:
+        logger.error(f"An unexpected error occurred: {str(e)}")
+        logger.error("Refer to the general troubleshooting guide: https://github.com/feder-cr/AIHawk_AIHawk_automatic_job_application/blob/main/readme.md#configuration")
+
+if __name__ == "__main__":
+    main()
diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -0,0 +1,30 @@
+click
+git+https://github.com/feder-cr/lib_resume_builder_AIHawk.git
+httpx~=0.27.2
+inputimeout==1.0.4
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+langchain==0.2.11
+langchain-anthropic
+langchain-huggingface
+langchain-community==0.2.10
+langchain-core===0.2.36
+langchain-google-genai==1.0.10
+langchain-ollama==0.1.3
+langchain-openai==0.1.17
+langchain-text-splitters==0.2.2
+langsmith==0.1.93
+Levenshtein==0.25.1
+loguru==0.7.2
+openai==1.37.1
+pdfminer.six==20221105
+pytest>=8.3.3
+python-dotenv~=1.0.1
+PyYAML~=6.0.2
+regex==2024.7.24
+reportlab==4.2.2
+selenium==4.9.1
+webdriver-manager==4.0.2
+pytest
+pytest-mock
+pytest-cov
diff --git a/backend/src/backend_integration.py b/backend/src/backend_integration.py
@@ -0,0 +1,25 @@
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+app = FastAPI()
+
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allows all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
+)
+
+@app.get("/")
+async def root():
+    return {"message": "Welcome to the Auto_Jobs_Applier_AIHawk API"}
+
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy"}
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/backend/src/job.py b/backend/src/job.py
@@ -0,0 +1,48 @@
+from dataclasses import dataclass
+
+from loguru import logger
+
+
+@dataclass
+class Job:
+    title: str
+    company: str
+    location: str
+    link: str
+    apply_method: str
+    description: str = ""
+    summarize_job_description: str = ""
+    pdf_path: str = ""
+    recruiter_link: str = ""
+
+    def set_summarize_job_description(self, summarize_job_description):
+        logger.debug(f"Setting summarized job description: {summarize_job_description}")
+        self.summarize_job_description = summarize_job_description
+
+    def set_job_description(self, description):
+        logger.debug(f"Setting job description: {description}")
+        self.description = description
+
+    def set_recruiter_link(self, recruiter_link):
+        logger.debug(f"Setting recruiter link: {recruiter_link}")
+        self.recruiter_link = recruiter_link
+
+    def formatted_job_information(self):
+        """
+        Formats the job information as a markdown string.
+        """
+        logger.debug(f"Formatting job information for job: {self.title} at {self.company}")
+        job_information = f"""
+        # Job Description
+        ## Job Information 
+        - Position: {self.title}
+        - At: {self.company}
+        - Location: {self.location}
+        - Recruiter Profile: {self.recruiter_link or 'Not available'}
+        
+        ## Description
+        {self.description or 'No description provided.'}
+        """
+        formatted_information = job_information.strip()
+        logger.debug(f"Formatted job information: {formatted_information}")
+        return formatted_information
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		MINIMUM_LOG_LEVEL = "INFO"
		MINIMUM_WAIT_TIME = 5