#!/usr/bin/python

import json
import time
import pymysql.cursors
import pymysql
from datetime import date
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

today = date.today()

db = pymysql.connect (host="cldy-hub-db-prod-do-user-1524670-0.a.db.ondigitalocean.com",
                     user="doadmin",
                     passwd="jmbly6e4obtma5z0",
                     db="defaultdb",
                     port=25060,
                     charset='utf8mb4',
                     cursorclass=pymysql.cursors.DictCursor)

with db.cursor() as cursor:
    sql = "SELECT `id`, `details` from jobs where id = '5932'"
    cursor.execute(sql)
    records = cursor.fetchall()
    for record in records:
        job_description = record['details']

    sql = "SELECT `id`, `email`, `resume_details` from resumes where person_id = '3919'"
    cursor.execute(sql)
    records = cursor.fetchall()
    for record in records:
        resume = record['resume_details']

    ''' MODEL 1     
    text = [resume, job_description]
    
    cv = CountVectorizer()
    count_matrix = cv.fit_transform(text)

    #Print the similarity scores
    print("\nSimilarity Scores:")
    print(cosine_similarity(count_matrix))

    #get the match percentage
    matchPercentage = cosine_similarity(count_matrix)[0][1] * 100
    matchPercentage = round(matchPercentage, 2) # round to two decimal
    print("Your resume matches about "+ str(matchPercentage)+ "% of the job description.")
    '''


# https://github.com/visheshwar/Resume_recomendation-/blob/master/codes/search.py
import glob
import os
import warnings
# import textract
import requests
from flask import (Flask, json, Blueprint, jsonify, redirect, render_template, request,
                   url_for)
from gensim.summarization import summarize
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
#from werkzeug import secure_filename

from autocorrect import spell

warnings.filterwarnings(action='ignore', category=UserWarning, module='gensim')

import re, string, unicodedata
import nltk
import contractions
import inflect
from bs4 import BeautifulSoup
from nltk import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import LancasterStemmer, WordNetLemmatizer

nltk.download('punkt')
nltk.download('stopwords')

def remove_non_ascii(words):
    """Remove non-ASCII characters from list of tokenized words"""
    new_words = []
    for word in words:
        new_word = unicodedata.normalize('NFKD', word).encode('ascii', 'ignore').decode('utf-8', 'ignore')
        new_words.append(new_word)
    return new_words

def to_lowercase(words):
    """Convert all characters to lowercase from list of tokenized words"""
    new_words = []
    for word in words:
        new_word = word.lower()
        new_words.append(new_word)
    return new_words

def remove_punctuation(words):
    """Remove punctuation from list of tokenized words"""
    new_words = []
    for word in words:
        new_word = re.sub(r'[^\w\s]', '', word)
        if new_word != '':
            new_words.append(new_word)
    return new_words

def replace_numbers(words):
    """Replace all interger occurrences in list of tokenized words with textual representation"""
    p = inflect.engine()
    new_words = []
    for word in words:
        if word.isdigit():
            new_word = p.number_to_words(word)
            new_words.append(new_word)
        else:
            new_words.append(word)
    return new_words

def remove_stopwords(words):
    """Remove stop words from list of tokenized words"""
    new_words = []
    for word in words:
        # print(word)
        if word not in stopwords.words('english'):
            new_words.append(word)
    return new_words

def stem_words(words):
    """Stem words in list of tokenized words"""
    stemmer = LancasterStemmer()
    stems = []
    for word in words:
        stem = stemmer.stem(word)
        stems.append(stem)
    return stems

def lemmatize_verbs(words):
    """Lemmatize verbs in list of tokenized words"""
    lemmatizer = WordNetLemmatizer()
    lemmas = []
    for word in words:
        lemma = lemmatizer.lemmatize(word, pos='v')
        lemmas.append(lemma)
    return lemmas

def normalize(words):
    words = remove_non_ascii(words)
    words = to_lowercase(words)
    words = remove_punctuation(words)
    # words = replace_numbers(words)
    words = remove_stopwords(words)
    words = stem_words(words)
    words = lemmatize_verbs(words)
    return words




def lcs(X, Y):
    try:
        mat = []
        for i in range(0,len(X)):
            row = []
            for j in range(0,len(Y)):
                if X[i] == Y[j]:
                    if i == 0 or j == 0:
                        row.append(1)
                    else:
                        val = 1 + int( mat[i-1][j-1] )
                        row.append(val)
                else:
                    row.append(0)
            mat.append(row)
        new_mat = []
        for r in  mat:
            r.sort()
            r.reverse()
            new_mat.append(r)
        lcs = 0
        for r in new_mat:
            if lcs < r[0]:
                lcs = r[0]
        return lcs
    except:
        return -9999

def spellCorrect(string):
    words = string.split(" ")
    correctWords = []
    for i in words:
        correctWords.append(spell(i))
    return " ".join(correctWords)

def semanticSearch(searchString, searchSentencesList):
        result = None
        #searchString = spellCorrect(searchString)
        bestScore = 0
        for i in searchSentencesList:
            score = lcs(searchString, i)
            print(score , i[0:100])
            print("")
            temp = [score]
            Final_Array.extend(temp)
            if score > bestScore:
                bestScore = score
                result = i
        return result


Final_Array = []

with db.cursor() as cursor:
    sql = "SELECT `id`, `details` from jobs where id = '5932'"
    cursor.execute(sql)
    records = cursor.fetchall()
    for record in records:
        job_description = record['details']
        jobfile = nltk.word_tokenize(job_description)
        jobfile = normalize(job_description)
        jobfile = ' '.join(map(str, job_description))

    sql = "SELECT `id`, `email`, `resume_details` from resumes where person_id = '3919'"
    cursor.execute(sql)
    records = cursor.fetchall()
    for record in records:
        resume = record['resume_details']
        resumefile = nltk.word_tokenize(resume)
        resumefile = normalize(resume)
        resumefile = ' '.join(map(str, resume))
    
    skill_match = skill_keyword_match(jobfile)
    #skill_match.extract_jobs_keywords()
    #resume_skills = skill_match.extract_resume_keywords(config.SAMPLE_RESUME_PDF)
    #top_job_matches = skill_match.cal_similarity(resume_skills.index, location)
    #top_job_matches.to_csv(config.RECOMMENDED_JOBS_FILE+location+'.csv', index=False)

    print('File of recommended jobs saved')