add jsfirm support0.0.4

author: msglm <msglm@techchud.xyz> 2026-05-10 20:32:35 -0500
committer: msglm <msglm@techchud.xyz> 2026-05-10 20:32:35 -0500
commit: 1db411b17407986627bd00b91c6d60dcaf4b1cea (patch)
tree: 57ae19f47f431faeaf691a2c7b0186cf1ebdea62
parent: a8fd261a82f3c748c3263fe5cffa0eba10b8a552 (diff)
download: getmeajob-0.0.4.tar.gz
getmeajob-0.0.4.tar.bz2
getmeajob-0.0.4.zip
5 files changed, 169 insertions, 36 deletions
diff --git a/getmeajob/getmeajob.py b/getmeajob/getmeajob.py
index cec1df1..7524637 100644
--- a/getmeajob/getmeajob.py
+++ b/getmeajob/getmeajob.py
@@ -8,7 +8,9 @@ from jobspy import scrape_jobs
 import argparse
 from fp.fp import FreeProxy
 from jinja2 import Template
-#from llama_cpp import Llama
+import getmeajob.scrapers.jobspy as jobspyscraper
+import getmeajob.scrapers.jsfirm as jsfirmscraper
+from llama_cpp import Llama
 
 template = Template("""
 {# jobs is a list of dicts with the fields you specified #}
@@ -89,6 +91,33 @@ template = Template("""
 </html>
                     """)
 
+def jobResults(site, listingEntry: dict) -> dict:
+    match site:
+        case "indeed":
+            return jobspyscraper.getJobs(listingEntry)
+        case "linkedin":
+            return jobspyscraper.getJobs(listingEntry)
+        case "zip_recruiter":
+            return jobspyscraper.getJobs(listingEntry)
+        case "google":
+            return jobspyscraper.getJobs(listingEntry)
+        case "glassdoor":
+            return jobspyscraper.getJobs(listingEntry)
+        case "bayt":
+            return jobspyscraper.getJobs(listingEntry)
+        case "naukri":
+            return jobspyscraper.getJobs(listingEntry)
+        case "bdjobs":
+            return jobspyscraper.getJobs(listingEntry)
+        case "jsfirm":
+            return jsfirmscraper.getJobs(listingEntry)
+        #case "aviationjobsearch":
+        #    return scrapers.aviationjobsearch.getJobs(listingEntry)
+        case _:
+            print("COULD NOT FIND SCRAPER FOR " + site + "!")
+            exit(1)
+    
+
 def main():
     parser = argparse.ArgumentParser(description='Get Me A Job!\n\nA CLI tool for scraping various sites and getting you a job. Outputs an HTML document.')
     parser.add_argument('config_file', type=str, help='The location of your getmeajob config file.')
@@ -97,45 +126,34 @@ def main():
     config = toml.load(open(args.config_file, 'rb'))
     for joblistingnames in config.keys():
 
-        listingentry = config[joblistingnames]
-
-        jobs = scrape_jobs(
-            site_name=listingentry["sites"],
-            search_term=listingentry["search"],
-            google_search_term=listingentry["search"] + " near " + listingentry["location"],
-            location=listingentry["location"],
-            results_wanted=listingentry["results_wanted"],
-            hours_old=listingentry["hours_old"],
-            country_indeed=listingentry["country"],
-            linkedin_fetch_description=True, # gets more info such as description, direct job url (slower)
-            proxies=[FreeProxy().get()] if listingentry["proxy"] else []
-        )
-
-        jobsdict = jobs.to_dict(orient='records')
+        listingEntry = config[joblistingnames]
+        
+        for site in listingEntry["sites"]: 
+            jobsdict = jobResults(site, listingEntry)
 
-        print(f"Found {len(jobs)} jobs")
+        print(f"Found {len(jobsdict)} jobs")
         for job in jobsdict:
             print(job)
 
-#        if len(listingentry["automated_questions"]) > 0:
-#
-#            qanda = []
-#
-#            llm = Llama.from_pretrained(repo_id="Mungert/Qwen3-4B-abliterated-GGUF",
-#                                        filename="*Q8_0.gguf",
-#                                        verbose=True
-#                                        )
-#            for question in listingentry["automated_questions"]:
-#	            qanda.append(llm.create_chat_completion(
-#	                    messages = [
-#	          {"role": "system", "content": "You are a summarizer tasked with summarizing job applications. Presented to you are the user's question about a job description and the description. Using only the provided description, answer the question to the best of your ability. If you are incapable of figuring out the answer, inform the user of this."},
-#	          {
-#	              "role": "user",
-#                  "content": "Job Summary: \n" + jobsdict["description"] + "\n\n Here is my question:\n " + question
-#	          }
-#              ]))
-#
-#            print(qanda)
+        if len(listingentry["automated_questions"]) > 0:
+
+            qanda = []
+
+            llm = Llama.from_pretrained(repo_id="Mungert/Qwen3-4B-abliterated-GGUF",
+                                        filename="*Q8_0.gguf",
+                                        verbose=True
+                                        )
+            for question in listingentry["automated_questions"]:
+                qanda.append(llm.create_chat_completion(
+                        messages = [
+              {"role": "system", "content": "You are a summarizer tasked with summarizing job applications. Presented to you are the user's question about a job description and the description. Using only the provided description, answer the question to the best of your ability. If you are incapable of figuring out the answer, inform the user of this."},
+              {
+                  "role": "user",
+                  "content": "Job Summary: \n" + jobsdict["description"] + "\n\n Here is my question:\n " + question
+              }
+              ]))
+
+            print(qanda)
 
 
         open(args.output_file, "w", encoding="utf-8").write(template.render(jobs=jobsdict))
diff --git a/getmeajob/scrapers/__init__.py b/getmeajob/scrapers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/getmeajob/scrapers/__init__.py
diff --git a/getmeajob/scrapers/jobspy.py b/getmeajob/scrapers/jobspy.py
new file mode 100644
index 0000000..5853baa
--- /dev/null
+++ b/getmeajob/scrapers/jobspy.py
@@ -0,0 +1,17 @@
+from jobspy import scrape_jobs
+
+def getJobs(listingEntry: dict) -> dict:
+        jobs = scrape_jobs(
+            site_name=listingEntry["sites"],
+            search_term=listingEntry["search"],
+            google_search_term=listingEntry["search"] + " near " + listingEntry["location"],
+            location=listingEntry["location"],
+            results_wanted=listingEntry["results_wanted"],
+            hours_old=listingEntry["hours_old"],
+            country_indeed=listingEntry["country"],
+            linkedin_fetch_description=True, # gets more info such as description, direct job url (slower)
+            proxies=[FreeProxy().get()] if listingEntry["proxy"] else []
+        )
+
+        return jobs.to_dict(orient='records')
+
diff --git a/getmeajob/scrapers/jsfirm.py b/getmeajob/scrapers/jsfirm.py
new file mode 100644
index 0000000..7547afc
--- /dev/null
+++ b/getmeajob/scrapers/jsfirm.py
@@ -0,0 +1,97 @@
+import argparse
+import os
+import selenium
+import time
+import random
+import requests
+import tempfile
+import shutil
+import base64
+from time import sleep
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions
+from selenium.webdriver.chrome.service import Service
+from selenium import webdriver
+
+def extractLinksFromTbody(tbody):
+    jobLinks = []
+    for row in tbody.find_elements(By.TAG_NAME, "tr"):
+
+        if "pagination-ys" in ((row.get_property("class") or "").split()):
+            break
+        tds = row.find_element(By.TAG_NAME, "td")
+        jobLinks.append(tds.find_elements(By.TAG_NAME, "a")[1].get_property("href"))
+    return jobLinks
+
+def gotoNextPage(tbody, pageToGoTo):
+    try:
+        nav = tbody.find_element(By.CLASS_NAME, "pagination-ys").find_element(By.TAG_NAME, "tr")
+    except:
+        return
+
+    pagelinks = nav.find_elements(By.TAG_NAME, "a")
+
+    for link in pagelinks:
+        if int(link.text) == pageToGoTo:
+            link.click()
+
+def getMaximumPages(tbody, placeToReturnTo, driver):
+    try:
+        #Navigate to the final page
+        nav = tbody.find_element(By.CLASS_NAME, "pagination-ys").find_element(By.TAG_NAME, "tr")
+        navLinks = nav.find_elements(By.TAG_NAME, "a")
+        finalPageLink = navLinks[navLinks.length - 1]
+        finalPageLink.click()
+        time.sleep(10)
+        driver.get(placeToReturnTo)
+        return int(driver.find_element(By.XPATH, """//*[@id="ContentPlaceHolder3_ContentPlaceHolder3_gvJobs"]""").find_element(By.TAG_NAME, "tbody").find_element(By.TAG_NAME, "span").text)
+    except:
+        time.sleep(10)
+        driver.get(placeToReturnTo)
+        return 1
+
+
+
+
+def getJobs(listingEntry: dict):
+    chrome_options = Options()
+    driver = webdriver.Chrome()
+    
+    url = "https://www.jsfirm.com/AllCategories/" + listingEntry["search"] + "/" + listingEntry["location"] + "/searchquickjobs"
+    driver.get(url)
+    driver.implicitly_wait(1.5)
+    
+
+    jobLinks = []
+    curPage = 0
+    time.sleep(10)
+    pageResults = driver.find_element(By.XPATH, """//*[@id="ContentPlaceHolder3_ContentPlaceHolder3_gvJobs"]""").find_element(By.TAG_NAME, "tbody")
+
+    for page in range(curPage, getMaximumPages(pageResults, url, driver)):
+        pageResults = driver.find_element(By.XPATH, """//*[@id="ContentPlaceHolder3_ContentPlaceHolder3_gvJobs"]""").find_element(By.TAG_NAME, "tbody")
+        jobLinks = jobLinks + extractLinksFromTbody(pageResults)
+
+        curPage = page + 1
+        gotoNextPage(pageResults, curPage)
+    
+    #Actually read the jobs
+    jobsDictContainer = []
+
+    for job in jobLinks:
+        driver.get(job)
+
+        jobsDictContainer.append(
+                {
+                    "job_url": job,
+                    "company": driver.find_element(By.ID, "ContentPlaceHolder2_ContentPlaceHolder2_ucCompanyOverview_lblCompanyName").text,
+                    "company_url": "https://www.jsfirm.com" + driver.find_element(By.ID, "ContentPlaceHolder2_ContentPlaceHolder2_ucCompanyOverview_lblJobsCompany").find_element(By.TAG_NAME, "a").get_property("href"),
+                    "title": driver.find_element(By.XPATH, "/html/body/form[1]/div[5]/div/div[2]/div/div[5]/div[1]/div[1]/div").text,
+                    "location": driver.find_element(By.ID, "ContentPlaceHolder2_ContentPlaceHolder2_hLocation").get_property("value"),
+                    "description": driver.find_element(By.ID, "ContentPlaceHolder2_ContentPlaceHolder2_ltlDescription").text
+                    }
+                )
+
+    return jobsDictContainer
diff --git a/pyproject.toml b/pyproject.toml
index 2b22f7a..7780104 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
   "JobSpy @ git+https://github.com/speedyapply/JobSpy.git",
   "llama-cpp-python",
   "free-proxy",		
+  "selenium",
 ]
 
 [project.scripts]
author	msglm <msglm@techchud.xyz>	2026-05-10 20:32:35 -0500
committer	msglm <msglm@techchud.xyz>	2026-05-10 20:32:35 -0500
commit	1db411b17407986627bd00b91c6d60dcaf4b1cea (patch)
tree	57ae19f47f431faeaf691a2c7b0186cf1ebdea62
parent	a8fd261a82f3c748c3263fe5cffa0eba10b8a552 (diff)
download	getmeajob-0.0.4.tar.gz getmeajob-0.0.4.tar.bz2 getmeajob-0.0.4.zip