#!/usr/bin/python

import json
import time
import requests
import feedparser
import pymysql.cursors
import pymysql

from slugify import slugify
from datetime import date, datetime
from couchbase.cluster import Cluster, ClusterOptions
from couchbase_core.cluster import PasswordAuthenticator

cluster = Cluster('couchbase://docs.gigminds.com', ClusterOptions(PasswordAuthenticator('admin', 'Tanya@12')))
cb = cluster.bucket('default')
cb_coll = cb.default_collection()

baseUri = 'https://api.embedly.com/1/extract'
rss_feed = 'http://daily.gigminds.com/~api/papers/6bb81d38-63b2-4d78-9fea-49ae728b4ac8/rss'

db = pymysql.connect (host="cldy-hub-db-prod-do-user-1524670-0.a.db.ondigitalocean.com",
                     user="doadmin",
                     passwd="jmbly6e4obtma5z0",
                     db="defaultdb",
                     port=25060,
                     charset='utf8mb4',
                     cursorclass=pymysql.cursors.DictCursor)


feed = feedparser.parse(rss_feed)
for key in feed.entries: 
    rv = cb_coll.get(key.link, quiet=True)
    if rv.success:
        print(key.link, ' - existing')
    else:
        print(key.link, ' - processing')
        options = {
            'key' : 'c137f8b68c2c453cb635edd4f39c70a7',
            'url' : key.link
        }
        headers = {            
        }

        try:
            response = requests.get(baseUri, params=options, headers=headers, timeout=6)
            if response.status_code != requests.codes.ok:
                print("Bad status code " + str(response.status_code))
            else:
                jsonData = json.loads(response.text)
                #print (jsonData)
                article = jsonData
                article['type'] = 'article'
                article['id'] = article['original_url']
                article['created_at'] = time.ctime()
                cb_coll.upsert(article['id'], article)

                ###################                
                if (article['content']):
                    cat_dict = ['sales cloud', 'covid-19', 'appexchange', 'developer', 'service cloud', 'salesforce dx', 'integration', 'einstein', 'ecommerce']
                    Text = article['content']
                    for char in '-.,\n':
                        Text=Text.replace(char,' ')
                    Text = Text.lower()

                    word_list = Text.split()
                    final_cat = ''
                    prev_cnt = 0
                    for dict in cat_dict:
                        cnt = word_list.count(dict)
                        if cnt > prev_cnt:
                            final_cat = dict
                            prev_cnt = cnt
                    if (final_cat == ''):
                        final_cat = "general"
                    print ('final:   ')
                    print (final_cat)
                ###################

                ms_date = article['published']
                if (ms_date):
                    pub_date = datetime.fromtimestamp(ms_date//1000.0)
                else:
                    pub_date = date.today()

                print(pub_date)
                with db.cursor() as cursor:
                    sql = "insert into `articles` (`content`, `description`, `url`, `keywords`, `original_url`, `provider_url`, `provider_display`, `provider_name`, `title`, `image_1`, `image_2`, slug, publish_date_ms, publish_date, category_1) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                    cursor.execute(sql, (article['content'], article['description'], article['url'], '', article['original_url'], article['provider_url'], article['provider_display'], article['provider_name'], article['title'], article['images'][0]['url'], '', slugify(article['title']) + "_" + str(ms_date), ms_date, pub_date, final_cat))
                    o_id = cursor.lastrowid
                    db.commit()




                #time.sleep(5)
                #print(json.dumps(article, indent=4, separators=(',', ': ')))
        except requests.exceptions.Timeout:
            print ("Timeout Error")
            pass
        except requests.exceptions.RequestException as err:
            print ("Error: " + str(err))
            pass

