xml from jobPosting Structured Data

import requests
from bs4 import BeautifulSoup
import json

# Base URL for the find-jobs section
base_url = 'https://smart-recruitments.framer.website/find-jobs/'

# Step 1: Load the main jobs page to find all job links
response = requests.get(base_url)
html_content = response.content

# Step 2: Parse the HTML with BeautifulSoup to find all job links
soup = BeautifulSoup(html_content, 'html.parser')

# Assuming job links are inside <a> tags with href pointing to the job details page
job_links = [a['href'] for a in soup.find_all('a', href=True) if '/find-jobs/' in a['href']]

# Prepare the base of the RSS feed
rss_feed = '''<?xml version="1.0" encoding="UTF-8" ?>
<source>
    <publisher>SmartRecruitments</publisher>
    <publisherurl>https://www.smart-recruitments.com</publisherurl>'''

# Step 3: Iterate over each job link, fetch its content, and extract the JSON
for job_link in job_links:
    # Full URL for each job link
    job_url = base_url + job_link.split('/')[-1]
    
    # Fetch the job details page
    response = requests.get(job_url)
    job_html_content = response.content

    # Parse the HTML with BeautifulSoup
    job_soup = BeautifulSoup(job_html_content, 'html.parser')

    # Locate the <script> tag containing the JSON
    script_tag = job_soup.find('script', type='application/ld+json')

    if script_tag:
        json_content = script_tag.string

        # Parse the JSON content
        data = json.loads(json_content)

        # Extract fields
        # hiring_organization = 
        # organization_name = hiring_organization.get('name', 'No Name Provided')

        rss_feed += f'''
        <job>
          <title><![CDATA[{data.get('title', 'No Title')}]]></title>
          <date><![CDATA[{data.get('datePosted', 'No Date')}]]></date>
          <referencenumber><![CDATA[{data.get('identifier', {}).get('value', 'No Reference Number')}]]></referencenumber>
          <url><![CDATA[{job_url}]]></url>
          <company><![CDATA[{data.get('hiringOrganization', {}).get('name', 'No Name Provided')}]]></company>
          <city><![CDATA[{data.get('jobLocation', {}).get('address', {}).get('addressLocality', 'No City')}]]></city>
          <state><![CDATA[{data.get('jobLocation', {}).get('address', {}).get('addressRegion', 'No State')}]]></state>
          <country><![CDATA[{data.get('jobLocation', {}).get('address', {}).get('addressCountry', 'No Country')}]]></country>
          <remote><![CDATA[{data.get('jobLocationType', 'No Remote Info')}]]></remote>
          <postalcode><![CDATA[{data.get('PostalAddress', 'No Postal Code')}]]></postalcode>
          <description><![CDATA[{data.get('description', 'No Description')}]]></description>
          <jobtype><![CDATA[{data.get('employmentType', 'No Job Type')}]]></jobtype>
          <category><![CDATA[{data.get('category', 'No Category')}]]></category>
        </job>'''

# Close the RSS feed
rss_feed += '''
</source>'''

# Save the combined feed to a file
with open('feed.xml', 'w') as f:
    f.write(rss_feed)