This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| wine_name_lookup = client.get_object(Bucket='data-science-wine-reviews', | |
| Key='nearest_neighbors/data/wine_reviews_select_cols.csv') | |
| wine_name_lookup = pd.read_csv(wine_name_lookup['Body']) | |
| recommendation_indices = list(result[1]) | |
| recommendation_indices = [int(n) for n in recommendation_indices] | |
| recommendations = [] | |
| for i in recommendation_indices: | |
| suggested_wine = wine_name_lookup.at[i, 'Name'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| response = runtime.invoke_endpoint(EndpointName='sagemaker-scikit-learn-2019-07-04-13-00-07-919', | |
| ContentType='application/json', | |
| Body=wine_vector_output) | |
| def decode(s, encoding="ascii", errors="ignore"): | |
| return s.decode(encoding=encoding, errors=errors) | |
| result = json.loads(decode(response['Body'].read())) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| wine_vector = sum(word_vectors) / len(word_vectors) | |
| wine_vector_output = json.dumps(wine_vector.tolist()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| obj = client.get_object(Bucket='data-science-wine-reviews', Key='word_vectors_idf.csv') | |
| wine_df = pd.read_csv(obj['Body']) | |
| wine_df.set_index(['word'], inplace=True) | |
| word_vectors = [] | |
| for p in payload: | |
| word_vector_string = wine_df.at[p, 'word_vec_idf'] | |
| word_vector_string = word_vector_string.replace('[', '').replace(r'\n', '').replace(']', '') | |
| word_vector = np.fromstring(word_vector_string, dtype=float, sep=' ') | |
| word_vectors.append(word_vector) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import boto3 | |
| import pandas as pd | |
| import numpy as np | |
| from six import BytesIO | |
| def lambda_handler(event, context): | |
| client = boto3.client('s3') | |
| runtime = boto3.client('runtime.sagemaker') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.preprocessing import normalize | |
| # open the vectors.txt file containing all the trained word embeddings, extracting the descriptors & embeddings | |
| num_points = len(open('vectors.txt','r').read().split('\n')) | |
| first_line = True | |
| index_to_word = [] | |
| with open("vectors.txt","r") as f: | |
| for line_num, line in enumerate(f): | |
| if first_line: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| s3 = boto3.resource('s3') | |
| key = bt_model.model_data[bt_model.model_data.find("/", 5)+1:] | |
| s3.Bucket(bucket).download_file(key, 'model.tar.gz') | |
| !tar -xvzf model.tar.gz |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| train_data = sagemaker.session.s3_input(train_data, distribution='FullyReplicated', | |
| content_type='text/plain', s3_data_type='S3Prefix') | |
| data_channels = {'train': train_data} | |
| bt_model.fit(inputs=data_channels, logs=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| sess = sagemaker.Session() | |
| # define the specifications of the sagemaker training instance | |
| bt_model = sagemaker.estimator.Estimator(container, | |
| role, | |
| train_instance_count=2, | |
| train_instance_type='ml.c4.2xlarge', | |
| train_volume_size = 5, | |
| train_max_run = 360000, | |
| input_mode= 'File', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| train_data = 's3://{}/wine-corpus.txt'.format(bucket) | |
| s3_output_location = 's3://{}/output'.format(bucket) | |
| region_name = boto3.Session().region_name | |
| container = sagemaker.amazon.amazon_estimator.get_image_uri(region_name, "blazingtext", "latest") | |
| print('Using SageMaker BlazingText container: {} ({})'.format(container, region_name)) |
NewerOlder