web-dev-qa-db-ja.com

Anaconda / jupyterからMNISTデータベースを取得できません

胡みんな、

私はpython/anaconda/jupyter/numPy、パンダなどは初めてです。本当にばかげた質問であれば、すみません。 anaconda/jupyterを使用してMNISTデータベースを取得しようとしています。しかし、最後にHTTPエラー500が発生するたびに。それは本当にサーバーの問題ですか(500が示唆するように)、または何か間違ったことをしていますか?

Jupyterでの入力:

from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')

結果:

    ---------------------------------------------------------------------------
    HTTPError                                 Traceback (most recent call last)
    <ipython-input-1-15dc285fb373> in <module>()
          1 from sklearn.datasets import fetch_mldata
    ----> 2 mnist = fetch_mldata('MNIST original')

    e:\ProgramData\Anaconda3\lib\site-packages\sklearn\datasets\mldata.py in fetch_mldata(dataname, target_name, data_name, transpose_data, data_home)
        140         urlname = MLDATA_BASE_URL % quote(dataname)
        141         try:
    --> 142             mldata_url = urlopen(urlname)
        143         except HTTPError as e:
        144             if e.code == 404:

    e:\ProgramData\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
        221     else:
        222         opener = _opener
    --> 223     return opener.open(url, data, timeout)
        224 
        225 def install_opener(opener):

    e:\ProgramData\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
        530         for processor in self.process_response.get(protocol, []):
        531             meth = getattr(processor, meth_name)
    --> 532             response = meth(req, response)
        533 
        534         return response

    e:\ProgramData\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
        640         if not (200 <= code < 300):
        641             response = self.parent.error(
    --> 642                 'http', request, response, code, msg, hdrs)
        643 
        644         return response

    e:\ProgramData\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
        562             http_err = 0
        563         args = (dict, proto, meth_name) + args
    --> 564         result = self._call_chain(*args)
        565         if result:
        566             return result

    e:\ProgramData\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
        502         for handler in handlers:
        503             func = getattr(handler, meth_name)
    --> 504             result = func(*args)
        505             if result is not None:
        506                 return result

    e:\ProgramData\Anaconda3\lib\urllib\request.py in http_error_302(self, req, fp, code, msg, headers)
        754         fp.close()
        755 
    --> 756         return self.parent.open(new, timeout=req.timeout)
        757 
        758     http_error_301 = http_error_303 = http_error_307 = http_error_302

    e:\ProgramData\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
        530         for processor in self.process_response.get(protocol, []):
        531             meth = getattr(processor, meth_name)
    --> 532             response = meth(req, response)
        533 
        534         return response

    e:\ProgramData\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
        640         if not (200 <= code < 300):
        641             response = self.parent.error(
    --> 642                 'http', request, response, code, msg, hdrs)
        643 
        644         return response

    e:\ProgramData\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
        568         if http_err:
        569             args = (dict, 'default', 'http_error_default') + orig_args
    --> 570             return self._call_chain(*args)
        571 
        572 # XXX probably also want an abstract factory that knows when it makes

    e:\ProgramData\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
        502         for handler in handlers:
        503             func = getattr(handler, meth_name)
    --> 504             result = func(*args)
        505             if result is not None:
        506                 return result

    e:\ProgramData\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
        648 class HTTPDefaultErrorHandler(BaseHandler):
        649     def http_error_default(self, req, fp, code, msg, hdrs):
    --> 650         raise HTTPError(req.full_url, code, msg, hdrs, fp)
        651 
        652 class HTTPRedirectHandler(BaseHandler):

    HTTPError: HTTP Error 500: INTERNAL SERVER ERROR
12
SKMTH

また、同じエラーが発生し、ファイアウォールをオフにする必要がありました。 Macbookで、[システム環境設定]> [セキュリティとプライバシー]> [ファイアウォール]> [ファイアウォールをオフにする]に移動します。

5
immarried
from sklearn.datasets import fetch_mldata
try:
    mnist = fetch_mldata('MNIST original')
except Exception as ex:        
    from six.moves import urllib
    from scipy.io import loadmat
    import os

    mnist_path = os.path.join(".", "datasets", "mnist-original.mat")

    # download dataset from github.
    mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
    response = urllib.request.urlopen(mnist_alternative_url)
    with open(mnist_path, "wb") as f:
        content = response.read()
        f.write(content)

    mnist_raw = loadmat(mnist_path)
    mnist = {
        "data": mnist_raw["data"].T,
        "target": mnist_raw["label"][0],
        "COL_NAMES": ["label", "data"],
        "DESCR": "mldata.org dataset: mnist-original",
    }
    print("Done!")
4
Dean Wong

私もあなたと同じエラーを受け取ります。このサーバーを必要としないいくつかの可能なソリューションを次に示します。

tensorflowがインストールされている場合、次の方法でMNISTデータを取得できます。

_import tensorflow.examples.tutorials.mnist.input_data as input_data
m=input_data.read_data_sets("MNIST")
_

たとえば、len(m.train.images)は55000です。

テンソルフローがない場合は、指示 here を使用してこのデータセットを取得できます。

3
Miriam Farber

パーティーに遅れましたが、同じエラーがあり、私の簡単な解決策は、次のように2つのコマンドを別々に実行することでした:

from sklearn import datasets

そして、これをjupyterノートブックの別の行で実行してください

mnist_data = datasets.fetch_mldata('MNIST original', data_home = 'datasets/')
2
noiivice

ここで良い解決策を見つけました: https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py

Yan LeCunのWebサイト( http://yann.lecun.com/exdb/mnist/ )からデータセットをダウンロードします。

import os
from urllib import urlretrieve

def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
    print("Downloading %s" % filename)
    urlretrieve(source + filename, filename)

# We then define functions for loading MNIST images and labels.
# For convenience, they also download the requested files if needed.
import gzip

def load_mnist_images(filename):
    if not os.path.exists(filename):
        download(filename)
    # Read the inputs in Yann LeCun's binary format.
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    # The inputs are vectors now, we reshape them to monochrome 2D images,
    # following the shape convention: (examples, channels, rows, columns)
    data = data.reshape(-1, 1, 28, 28)
    # The inputs come as bytes, we convert them to float32 in range [0,1].
    # (Actually to range [0, 255/256], for compatibility to the version
    # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
    return data / np.float32(256)

def load_mnist_labels(filename):
    if not os.path.exists(filename):
        download(filename)
    # Read the labels in Yann LeCun's binary format.
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
    # The labels are vectors of integers now, that's exactly what we want.
    return data


X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
1
Peter

MNISTデータセットをダウンロードする別の場所を次に示します( https://github.com/ageron/handson-ml/blob/master/03_classification.ipynb から参照)

from six.moves import urllib
from sklearn.datasets import fetch_mldata
try:
    mnist = fetch_mldata('MNIST original')
except urllib.error.HTTPError as ex:
    print("Could not download MNIST data from mldata.org, trying alternative...")

    # Alternative method to load MNIST, if mldata.org is down
    from scipy.io import loadmat
    mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
    mnist_path = "./mnist-original.mat"
    response = urllib.request.urlopen(mnist_alternative_url)
    with open(mnist_path, "wb") as f:
        content = response.read()
        f.write(content)
    mnist_raw = loadmat(mnist_path)
    mnist = {
        "data": mnist_raw["data"].T,
        "target": mnist_raw["label"][0],
        "COL_NAMES": ["label", "data"],
        "DESCR": "mldata.org dataset: mnist-original",
    }
    print("Success!")
1
Dave McCormick

https://github.com/ageron/handson-ml/issues/7 でこのソリューションを見つけましたが、これは私にとって最も有用でした。 https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat からファイルをダウンロードするだけです

その後、このスクリプトを使用します。

from scipy.io import loadmat
mnist_path = "my/local/path/mnist-original.mat" #type the directory where you want to the file is located
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
print("Success!")
0

これはpython 3.6。*

import os
from urllib.request import urlretrieve
import numpy as np

def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
    print("Downloading %s" % filename)
    urlretrieve(source + filename, filename)

# We then define functions for loading MNIST images and labels.
# For convenience, they also download the requested files if needed.
import gzip

def load_mnist_images(filename):
    if not os.path.exists(filename):
        download(filename)
    # Read the inputs in Yann LeCun's binary format.
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    # The inputs are vectors now, we reshape them to monochrome 2D images,
    # following the shape convention: (examples, channels, rows, columns)
    data = data.reshape(-1, 1, 28, 28)
    # The inputs come as bytes, we convert them to float32 in range [0,1].
    # (Actually to range [0, 255/256], for compatibility to the version
    # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
    return data / np.float32(256)

def load_mnist_labels(filename):
    if not os.path.exists(filename):
        download(filename)
    # Read the labels in Yann LeCun's binary format.
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
    # The labels are vectors of integers now, that's exactly what we want.
    return data


X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
0
sliawatimena