File size: 3,123 Bytes
a4656ff
 
 
 
 
 
 
 
d66ef5a
 
 
 
 
a4656ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
---
license: apache-2.0
metrics:
- accuracy
pipeline_tag: text-classification
tags:
- cnn
- amazon_reviews
datasets:
- yassiracharki/Amazon_Reviews_Binary_for_Sentiment_Analysis
language:
- en
library_name: fasttext
---
# Model Card for Model ID

# Downloads
!pip install contractions
!pip install textsearch
!pip install tqdm

import nltk
nltk.download('punkt')

# Fundamental classes
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np

# Time
import time
import datetime

# Preprocessing
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from sklearn.preprocessing import LabelEncoder
import contractions
from bs4 import BeautifulSoup
import re
import tqdm
import unicodedata

seed = 3541
np.random.seed(seed)

# Define a dummy loss to bypass the error during model loading
def dummy_loss(y_true, y_pred):
    return tf.reduce_mean(y_pred - y_true)

# Loading the model Trained on Amazon reviews
modelAmazon = keras.models.load_model(
    '/kaggle/input/pre-trained-model-binary-cnn-nlp-amazon-reviews/tensorflow1/pre_trained_sentiment_analysis_cnn_model_amazon_reviews/1/Binary_Classification_86_Amazon_Reviews_CNN.h5',
    compile=False
)

# Compile the model with the correct loss function and reduction
modelAmazon.compile(
    optimizer='adam',
    loss=keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE),
    metrics=['accuracy']
)

# Loading Amazon test data
dataset_test_Amazon = pd.read_csv('/kaggle/input/amazon-reviews-for-sa-binary-negative-positive-csv/amazon_review_sa_binary_csv/test.csv')

# Loading Amazon train data (to be used on the label encoder)
dataset_train_Amazon = pd.read_csv('/kaggle/input/amazon-reviews-for-sa-binary-negative-positive-csv/amazon_review_sa_binary_csv/train.csv')

# Shuffling the Test Data
test_Amazon = dataset_test_Amazon.sample(frac=1)
train_Amazon = dataset_train_Amazon.sample(frac=1)

# Taking a tiny portion of the database (because it will only be used on the label encoder)
train_Amazon = dataset_train_Amazon.iloc[:100, :]

# Taking only necessary columns
y_test_Amazon = test_Amazon['class_index'].values
X_train_Amazon = train_Amazon['review_text'].values
y_train_Amazon = train_Amazon['class_index'].values

# Preprocess corpus function
def pre_process_corpus(corpus):
    processed_corpus = []
    for doc in tqdm.tqdm(corpus):
        doc = contractions.fix(doc)
        doc = BeautifulSoup(doc, "html.parser").get_text()
        doc = unicodedata.normalize('NFKD', doc).encode('ascii', 'ignore').decode('utf-8', 'ignore')
        doc = re.sub(r'[^a-zA-Z\s]', '', doc, re.I|re.A)
        doc = doc.lower()
        doc = doc.strip()
        processed_corpus.append(doc)
    return processed_corpus

# Preprocessing the Data
X_test_Amazon = pre_process_corpus(test_Amazon['review_text'].values)
X_train_Amazon = pre_process_corpus(X_train_Amazon)

# Creating and Fitting the Tokenizer
etc ...

More info on the Model's page on Kaggle :

https://www.kaggle.com/models/yacharki/pre-trained-model-binary-cnn-nlp-amazon-reviews