none
Error 0063 : cannot open the connection RRS feed

  • Question

  • Hi AK,

     When am trying to run R code in Azure its throwing error 0063:  cannot open the connection , this is my code 

    install.packages("src/lda_packages/plyr_1.8.3.zip", lib = ".", repos = NULL, verbose = TRUE)

    (success.plyr<- library("plyr", lib.loc = ".", logical.return = TRUE, verbose = TRUE))

    install.packages("src/lda_packages/NLP_0.1-8.zip", lib = ".", repos = NULL, verbose = TRUE)

    (success.plyr<- library("NLP", lib.loc = ".", logical.return = TRUE, verbose = TRUE))

    install.packages("src/lda_packages/tm_0.6-2.zip", lib = ".", repos = NULL, verbose = TRUE)

    (success.tm <- library("tm", lib.loc = ".", logical.return = TRUE, verbose = TRUE))

    #Install actual package

    install.packages("src/lda_packages/lda_1.3.2.zip", lib = ".", repos = NULL, verbose = TRUE)

    (success <- library("lda", lib.loc = ".", logical.return = TRUE, verbose = TRUE))



    library(lda,lib.loc = ".")



    documents.file <- "documents.csv"
    num.topics <- 10


    documents <- read.csv(documents.file, stringsAsFactors=FALSE)


    corpus <- Corpus(VectorSource(documents$contents))
    corpus <- tm_map(corpus, removePunctuation)
    corpus <- tm_map(corpus, removeNumbers)
    corpus <- tm_map(corpus, removeWords, stopwords("english"))
    corpus <- tm_map(corpus, stripWhitespace)
    corpus <- tm_map(corpus, stemDocument, language="english", mc.cores=1)


    lines <- sapply(1:length(corpus), function(i) corpus[[i]]$content)

    docs <- lexicalize(lines)


    result <- lda.collapsed.gibbs.sampler(docs$documents, 10, docs$vocab, 25, 0.1, 0.1, compute.log.likelihood=TRUE)
    top.words <- top.topic.words(result$topics, 10, by.score=TRUE)

    cat("Top words for each topic")
    print(top.words)


    top.documents <- top.topic.documents(result$document_sums, num.documents = 10)
    top.documents.contents <- t(aaply(top.documents, 2, function(col) documents$contents[col]))

    cat("Top documents for each topic")
    print(top.documents)

    please kindly help me resolve this issuse, i went through few articles related to this error but i didnt find any solution for my issue.

    Thanks & Regards,

    Shalini

    Tuesday, September 29, 2015 11:46 AM

Answers

  • Hi Shalini,

    Thanks for the sample lines of text. I created a CSV file with 2 rows of data.

    This file was uploaded into Azure ML as a dataset, here is the updated experiment:

    The sample script is as follows:

    documents <- maml.mapInputPort(1) # class: data.frame
     
    
    install.packages("src/foo/lda.zip", lib = ".", repos = NULL, verbose = TRUE)
    success <- library("lda", lib.loc = ".", logical.return = TRUE, verbose = TRUE)
    library(lda)
    library(plyr)
    library(tm)
    library(NLP)
    
    num.topics <- 10
     
    corpus <- Corpus(VectorSource(documents$contents))
    corpus <- tm_map(corpus, removePunctuation)
    corpus <- tm_map(corpus, removeNumbers)
    corpus <- tm_map(corpus, removeWords, stopwords("english"))
    corpus <- tm_map(corpus, stripWhitespace)
    corpus <- tm_map(corpus, stemDocument, language="english", mc.cores=1)
    
    lines <- sapply(1:length(corpus), function(i) corpus[[i]]$content)
    
    docs <- lexicalize(lines)
    result <- lda.collapsed.gibbs.sampler(docs$documents, 10, docs$vocab, 25, 0.1, 0.1, compute.log.likelihood=TRUE)
    top.words <- top.topic.words(result$topics, 10, by.score=TRUE)
    
    cat("Top words for each topic")
    print(top.words)
    top.documents <- top.topic.documents(result$document_sums, num.documents = 10)
    top.documents.contents <- t(aaply(top.documents, 2, function(col) documents$contents[col]))
    cat("Top documents for each topic")
    print(top.documents)
    data <- as.data.frame(documents)
    
    # Select data.frame to be sent to the output Dataset port
    maml.mapOutputPort("data");

    The output log is as follows:

    [ModuleOutput] Top words for each topic      [,1]        [,2]       [,3]       [,4]        [,5]        [,6]       
    [ModuleOutput]
    [ModuleOutput]  [1,] "michael"   "document" "collect"  "data"      "observ"    "cat"      
    [ModuleOutput]
    [ModuleOutput]  [2,] "meow"      "topic"    "languag"  "similar"   "exampl"    "particular"

    It seems like the data was not being read previously, could you check to see if this works?

    Regards,
    Jaya.



    Wednesday, September 30, 2015 10:52 AM

All replies

  • Hi Shalini,

    Could you provide a couple of lines of text from your file 'documents.csv'?

    Azure ML has the packages: 'plyr', 'nlp' and 'tm' already installed, so you only need to download the 'lda' package from the CRAN repository and import it as a zip bundle in the 'Execute R Script' module.

     

    Most of your script seems to be work within the 'Execute R Script' module:

    install.packages("src/foo/lda.zip", lib = ".", repos = NULL, verbose = TRUE)
    success <- library("lda", lib.loc = ".", logical.return = TRUE, verbose = TRUE)
    library(lda)
    library(plyr)
    library(tm)
    library(NLP)
    num.topics <- 10
    documents <-"Test input" 
    corpus <- Corpus(VectorSource(documents))
    corpus <- tm_map(corpus, removePunctuation)
    corpus <- tm_map(corpus, removeNumbers)
    corpus <- tm_map(corpus, removeWords, stopwords("english"))
    corpus <- tm_map(corpus, stripWhitespace)
    corpus <- tm_map(corpus, stemDocument, language="english", mc.cores=1)
    lines <- sapply(1:length(corpus), function(i) corpus[[i]]$content)
     
    docs <- lexicalize(lines)
    result <- lda.collapsed.gibbs.sampler(docs$documents, 10, docs$vocab, 25, 0.1, 0.1, compute.log.likelihood=TRUE)
    top.words <- top.topic.words(result$topics, 10, by.score=TRUE)
    cat("Top words for each topic")
    print(top.words)
    #top.documents <- top.topic.documents(result$document_sums, num.documents = 10)
    #top.documents.contents <- t(aaply(top.documents, 2, function(col) documents$contents[col]))
    
    #cat("Top documents for each topic")
    #print(top.documents)
     
    data <- as.data.frame(documents)
     
    # Select data.frame to be sent to the output Dataset port
    maml.mapOutputPort("data");
    

    Thanks,
    Jaya.


    Tuesday, September 29, 2015 1:38 PM
  • Hi Jaya , 

    Sample lines in file document.csv 

    In natural language processing, latent Dirichlet allocation (LDA) is a generative model that allows sets of observations to be explained by unobserved groups that explain why some parts of the data are similar. For example, if observations are words collected into documents, it posits that each document is a mixture of a small number of topics and that each word's creation is attributable to one of the document's topics. LDA is an example of a topic model and was first presented as a graphical model for topic discovery by David Blei, Andrew Ng, and Michael Jordan in 2003.
    In machine learning and natural language processing, a topic model is a type of statistical model for discovering the abstract "topics" that occur in a collection of documents. Intuitively, given that a document is about a particular topic, one would expect particular words to appear in the document more or less frequently: "dog" and "bone" will appear more often in documents about dogs, "cat" and "meow" will appear in documents about cats, and "the" and "is" will appear equally in both. A document typically concerns multiple topics in different proportions; thus, in a document that is 10% about cats and 90% about dogs, there would probably be about 9 times more dog words than cat words. A topic model captures this intuition in a mathematical framework, which allows examining a set of documents and discovering, based on the statistics of the words in each, what the topics might be and what each document's balance of topics is. Although topic models were first described and implemented in the context of natural language processing, they have applications in other fields such as bioinformatics.

    the document has paragraphs , above is the sample of it .

    Jaya now we are getting error 0063 as files should be character string or connection .

    Thanks and Regards,

    Shalini 

    Wednesday, September 30, 2015 6:13 AM
  • Hi Shalini,

    Thanks for the sample lines of text. I created a CSV file with 2 rows of data.

    This file was uploaded into Azure ML as a dataset, here is the updated experiment:

    The sample script is as follows:

    documents <- maml.mapInputPort(1) # class: data.frame
     
    
    install.packages("src/foo/lda.zip", lib = ".", repos = NULL, verbose = TRUE)
    success <- library("lda", lib.loc = ".", logical.return = TRUE, verbose = TRUE)
    library(lda)
    library(plyr)
    library(tm)
    library(NLP)
    
    num.topics <- 10
     
    corpus <- Corpus(VectorSource(documents$contents))
    corpus <- tm_map(corpus, removePunctuation)
    corpus <- tm_map(corpus, removeNumbers)
    corpus <- tm_map(corpus, removeWords, stopwords("english"))
    corpus <- tm_map(corpus, stripWhitespace)
    corpus <- tm_map(corpus, stemDocument, language="english", mc.cores=1)
    
    lines <- sapply(1:length(corpus), function(i) corpus[[i]]$content)
    
    docs <- lexicalize(lines)
    result <- lda.collapsed.gibbs.sampler(docs$documents, 10, docs$vocab, 25, 0.1, 0.1, compute.log.likelihood=TRUE)
    top.words <- top.topic.words(result$topics, 10, by.score=TRUE)
    
    cat("Top words for each topic")
    print(top.words)
    top.documents <- top.topic.documents(result$document_sums, num.documents = 10)
    top.documents.contents <- t(aaply(top.documents, 2, function(col) documents$contents[col]))
    cat("Top documents for each topic")
    print(top.documents)
    data <- as.data.frame(documents)
    
    # Select data.frame to be sent to the output Dataset port
    maml.mapOutputPort("data");

    The output log is as follows:

    [ModuleOutput] Top words for each topic      [,1]        [,2]       [,3]       [,4]        [,5]        [,6]       
    [ModuleOutput]
    [ModuleOutput]  [1,] "michael"   "document" "collect"  "data"      "observ"    "cat"      
    [ModuleOutput]
    [ModuleOutput]  [2,] "meow"      "topic"    "languag"  "similar"   "exampl"    "particular"

    It seems like the data was not being read previously, could you check to see if this works?

    Regards,
    Jaya.



    Wednesday, September 30, 2015 10:52 AM