import pandas as pd

import re

from bs4 import BeautifulSoup


def clean_jobs_resume_csv():
    # read jobs_desc.csv to clean job details
    j_df = pd.read_csv("jobs_des.csv")

    jobs_desc = []
    for i in range(0, len(j_df)):
        j_df.iloc[i]['details'] = BeautifulSoup(j_df.iloc[i]['details']).get_text()
        p = re.compile(r'<.*?>')
        j_df.iloc[i]['details'] = p.sub('', j_df.iloc[i]['details'])
        p = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});')
        j_df.iloc[i]['details'] = p.sub('', j_df.iloc[i]['details'])
        raw = re.sub(r"[^a-zA-Z0-9]+", ' ', j_df.iloc[i]['details'])

        jobs_desc.append(
            [j_df.iloc[i]['id'], j_df.iloc[i]['title'], raw, j_df.iloc[i]['city'], j_df.iloc[i]['state_province']])

    df = pd.DataFrame(jobs_desc, columns=['id', 'title', 'details', 'city', 'state_province'])
    df.to_csv("clean_jobs.csv")

    # read resumes_desc.csv to clean resume details
    r_df = pd.read_csv("resumes_des.csv")

    resume_desc = []
    for i in range(0, len(r_df)):
        r_df.iloc[i]['resume_details'] = BeautifulSoup(r_df.iloc[i]['resume_details']).get_text()
        p = re.compile(r'<.*?>')
        r_df.iloc[i]['resume_details'] = p.sub('', r_df.iloc[i]['resume_details'])
        p = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});')
        r_df.iloc[i]['resume_details'] = p.sub('', r_df.iloc[i]['resume_details'])
        raw = re.sub(r"[^a-zA-Z0-9]+", ' ', r_df.iloc[i]['resume_details'])

        resume_desc.append(
            [r_df.iloc[i]['resume_id'], r_df.iloc[i]['person_id'], r_df.iloc[i]['title'], raw, r_df.iloc[i]['city'],
             r_df.iloc[i]['state_province']])

    df = pd.DataFrame(resume_desc,
                      columns=['resume_id', 'person_id', 'title', 'resume_details', 'city', 'state_province'])
    df.to_csv("clean_resumes.csv")
