none
Visual Basic Json to CSV Conversion - problem converting large json files to csv

    Question

  • I have large json files(50+mbs) that I need to convert to .csv. I also have a json-to-csv converter coded in Visual Basic but because the number of rows in the csv file is limited to 1,048,576 rows I'm unable to convert everything successfully onto one sheet.

    Can I add some code to the converter to add extra .csv files when it gets to a certain limit? This is the code for the json-to-csv program

    lass Form1
    
        Private marketDictionary As New Dictionary(Of String, String)
        Private runnerDictionary As New Dictionary(Of Integer, String)
    
        Public Sub Print(ByVal Message As String)
            TextBox1.SelectionStart = TextBox1.Text.Length
            TextBox1.SelectedText = vbCrLf & Message
        End Sub
    
        Private Sub OpenToolStripMenuItem_Click(sender As Object, e As EventArgs) Handles OpenToolStripMenuItem.Click
            With OpenFileDialog1
                .Title = "Open File ..."
                .InitialDirectory = "C:\Users\Anto\Desktop\Application Files\Betfair_1_0_0_1\"
                .FileName = "*.json"
                .ShowDialog()
            End With
    
        End Sub
    
        Private Sub ExitToolStripMenuItem_Click(sender As Object, e As EventArgs) Handles ExitToolStripMenuItem.Click
    
            Me.Close()
    
        End Sub
    
        Private Sub ProcessToolStripMenuItem_Click(sender As Object, e As EventArgs) Handles ProcessToolStripMenuItem.Click
    
            Print("Processing JSON file")
    
            ProcessJSON(OpenFileDialog1.FileName.ToString())
    
            Print("Processing complete")
    
        End Sub
    
        Private Sub OpenFileDialog1_FileOk(sender As Object, e As System.ComponentModel.CancelEventArgs) Handles OpenFileDialog1.FileOk
    
            Dim jsonFilename As String
    
            jsonFilename = OpenFileDialog1.FileName.ToString()
    
            Dim dateString As String = jsonFilename.Replace("C:\Users\Anto\Desktop\Application Files\Betfair_1_0_0_1\", "").Replace(".json", "")
    
            Dim marketKeys As String = "C:\Users\Anto\Desktop\Application Files\Betfair_1_0_0_1\marketKeys-" & dateString & ".csv"
            Dim runnerKeys As String = "C:\Users\Anto\Desktop\Application Files\Betfair_1_0_0_1\runnerKeys-" & dateString & ".csv"
    
    
            Print(jsonFilename)
            Print(marketKeys)
            Print(runnerKeys)
            Print("")
            Print("Loading market and runner keys")
    
            LoadKeys(marketKeys, runnerKeys)
    
            Print("Keys loaded - System ready for processing")
            Print("")
    
        End Sub
    
        Private Sub LoadKeys(ByVal marketKeysFilename As String, ByVal runnerKeysFilename As String)
    
            Dim line As String
    
            Using reader As StreamReader = New StreamReader(marketKeysFilename)
    
                line = reader.ReadLine
    
                Do While (Not line Is Nothing)
    
                    Dim parts As String() = Strings.Split(line, ",")
                    Try
                        marketDictionary.Add(parts(0), parts(1))
                    Catch ex As Exception
                        Print("Ignoring duplicate market key")
                    End Try
    
                    line = reader.ReadLine
    
                Loop
    
            End Using
    
            Using reader As StreamReader = New StreamReader(runnerKeysFilename)
    
                line = reader.ReadLine
    
                Do While (Not line Is Nothing)
    
                    Dim parts As String() = Strings.Split(line, ",")
    
                    Try
                        runnerDictionary.Add(parts(0), parts(1))
                    Catch ex As Exception
                        Print("Ignoring duplicate runner key")
                    End Try
    
                    line = reader.ReadLine
    
                Loop
    
            End Using
    
        End Sub
    
        Private Sub ProcessJSON(ByVal jsonFilename As String)
    
            Dim outputFilename As String = jsonFilename.Replace("json", "csv")
    
            Dim line As String
    
            Using reader As StreamReader = New StreamReader(jsonFilename)
    
                line = reader.ReadLine
    
                Do While (Not line Is Nothing)
    
                    Dim parts As String() = Strings.Split(line, "*")
                    Dim book() As MarketBookResponse = DeserializeRawBook(parts(1))
                    For bookCount As Integer = 0 To book(0).result.Count - 1
                        For runnerCount As Integer = 0 To book(0).result(bookCount).runners.Count - 1
    
                            With book(0).result(bookCount).runners(runnerCount)
    
                                Using writer As StreamWriter = File.AppendText(outputFilename)
    
                                    writer.WriteLine(parts(0) & "," & marketDictionary.Item(book(0).result(bookCount).marketId) & "," & runnerDictionary.Item(.selectionId) & "," & .lastPriceTraded)
    
                                End Using
    
                            End With
    
                        Next
    
                    Next
    
                    line = reader.ReadLine 'read in the next line.
    
                Loop
    
            End Using
    
        End Sub
    
    End Class

    By the way I have very little coding experience, this is coded straight from a book I bought.

    Any help would be greatly appreciated, thanks

    Friday, April 14, 2017 12:53 AM

All replies

  • I also have a json-to-csv converter coded in Visual Basic but because the number of rows in the csv file is limited to 1,048,576 rows I'm unable to convert everything successfully onto one sheet.

    What is a 'sheet'?   Do you mean that your file has to comply with some limit imposed by some other processing?

    In that case you need to count the lines as you create them.  That code seems very inefficient, but is actually easy to modify.  You will need to manufacture a new filename for each portion, however.  Something like:

            Dim outputFileBase As String = jsonFilename.Replace(".json", "")
            Dim line As String
            Dim LineCount As Integer = 0
            Dim FileCount As Integer = 0
            Dim OutputFileName As String = Path.Combine(outputFileBase & FileCount.ToString, ".csv")
    
            Using reader As StreamReader = New StreamReader(jsonFilename)
                line = reader.ReadLine
                Do While (Not line Is Nothing)
                    Dim parts As String() = Strings.Split(line, "*")
                    Dim book() As MarketBookResponse = DeserializeRawBook(parts(1))
                    For bookCount As Integer = 0 To book(0).result.Count - 1
                        For runnerCount As Integer = 0 To book(0).result(bookCount).runners.Count - 1
                            With book(0).result(bookCount).runners(runnerCount)
                                Using writer As StreamWriter = File.AppendText(OutputFileName)
                                    writer.WriteLine(parts(0) & "," & marketDictionary.Item(book(0).result(bookCount).marketId) & "," & runnerDictionary.Item(.selectionId) & "," & .lastPriceTraded)
                                    LineCount += 1
                                    If LineCount > 1000000 Then
                                        FileCount += 1
                                        OutputFileName = Path.Combine(outputFileBase & FileCount.ToString, ".csv")
                                    End If
                                End Using
                            End With
                        Next
                    Next
                    line = reader.ReadLine 'read in the next line.
                Loop
            End Using
    

    Friday, April 14, 2017 1:59 AM
  • Bill,

    Unless you want to take it all the way back to the beginning, what Acamar suggested is probably the best way to go.

    If you do want to take it all the way back then I've got to ask: Why would you want a .csv file that large? If you'd care to explain what the data is about then there might be a better remedy.


    "A problem well stated is a problem half solved.” - Charles F. Kettering

    Friday, April 14, 2017 10:00 AM
  • Hi Acamar and thanks for your reply.

    Firstly when I wrote 'sheet' I meant the csv file. The csv file is limited to 1,048,576 rows.

    And yes you are right the code is not as efficient as it could be and the author of the book acknowledges that. His intentions were to show readers a broad view of how to build betfair applications using vb.

    I ran the revised code and I got this error:

    An unhandled exception of type 'System.IO.DirectoryNotFoundException' occurred in mscorlib.dll

    Additional information: Could not find a part of the path 'C:\Users\Anto\Desktop\Application Files\Betfair_1_0_0_1\2017-04-050\.csv'.

    Friday, April 14, 2017 11:55 PM
  • Additional information: Could not find a part of the path 'C:\Users\Anto\Desktop\Application Files\Betfair_1_0_0_1\2017-04-050\.csv'.

    What line of code did the error occur at?  If it was
        Using writer As StreamWriter = File.AppendText(OutputFileName)

    then that implies that
        Dim OutputFileName As String = Path.Combine(outputFileBase & FileCount.ToString, ".csv")

    did not work, and that in turn implies that 
        Dim outputFileBase As String = jsonFilename.Replace(".json", "")did not work.  That might be because you put that initialisation code before whatever code initialises jsonFilename. You need to insert breakpoints from the first usage of jsonFilename and step through the code one line at a time to find out why 'outputFileBase & FileCount.ToString' might be blank, even though there appears to be a directory name for the output file.

    Saturday, April 15, 2017 12:37 AM
  • Hi Frank, thanks for your reply.

    The large json files I have are Betfair horse racing market prices gathered during a given day's racing using Betfair's API and a Visual Basic program.

    The vb program pulls info from the API and sends it to my desktop. I then activate the json2csv converter to convert them to csv.

    I hope to reduce the size of the json files in the future as some of the information, for me, is unnecessary and as Acamar said, the code is inefficient.

    Saturday, April 15, 2017 12:38 AM
  • Hi Acamar, yes it broke there - Using writer As StreamWriter = File.AppendText(OutputFileName)

    I replaced this

     Private Sub ProcessJSON(ByVal jsonFilename As String)
    
            Dim outputFilename As String = jsonFilename.Replace("json", "csv")
    
            Dim line As String
    
            Using reader As StreamReader = New StreamReader(jsonFilename)
    
                line = reader.ReadLine
    
                Do While (Not line Is Nothing)
    
                    Dim parts As String() = Strings.Split(line, "*")
                    Dim book() As MarketBookResponse = DeserializeRawBook(parts(1))
                    For bookCount As Integer = 0 To book(0).result.Count - 1
                        For runnerCount As Integer = 0 To book(0).result(bookCount).runners.Count - 1
    
                            With book(0).result(bookCount).runners(runnerCount)
    
                                Using writer As StreamWriter = File.AppendText(outputFilename)
    
                                    writer.WriteLine(parts(0) & "," & marketDictionary.Item(book(0).result(bookCount).marketId) & "," & runnerDictionary.Item(.selectionId) & "," & .lastPriceTraded)
    
                                End Using
    
                            End With
    
                        Next
    
                    Next
    
                    line = reader.ReadLine 'read in the next line.
    
                Loop
    
            End Using
    
        End Sub

    with this

     Private Sub ProcessJSON(ByVal jsonFilename As String)
    
            Dim outputFileBase As String = jsonFilename.Replace(".json", "")
    
            Dim line As String
    
            Dim LineCount As Integer = 0
    
            Dim FileCount As Integer = 0
    
            Dim OutputFileName As String = Path.Combine(outputFileBase & FileCount.ToString, ".csv")
    
            Using reader As StreamReader = New StreamReader(jsonFilename)
    
                line = reader.ReadLine
    
                Do While (Not line Is Nothing)
    
                    Dim parts As String() = Strings.Split(line, "*")
    
                    Dim book() As MarketBookResponse = DeserializeRawBook(parts(1))
    
                    For bookCount As Integer = 0 To book(0).result.Count - 1
    
                        For runnerCount As Integer = 0 To book(0).result(bookCount).runners.Count - 1
    
                            With book(0).result(bookCount).runners(runnerCount)
    
                                Using writer As StreamWriter = File.AppendText(OutputFileName)
                                    writer.WriteLine(parts(0) & "," & marketDictionary.Item(book(0).result(bookCount).marketId) & "," & runnerDictionary.Item(.selectionId) & "," & .lastPriceTraded)
                                    LineCount += 1
    
                                    If LineCount > 1000000 Then
                                        FileCount += 1
                                        OutputFileName = Path.Combine(outputFileBase & FileCount.ToString, ".csv")
    
                                    End If
    
                                End Using
    
                            End With
    
                        Next
                    Next
                    line = reader.ReadLine 'read in the next line.
                Loop
            End Using
    
        End Sub


    Saturday, April 15, 2017 1:06 AM
  • Hi Acamar, yes it broke there - Using writer As StreamWriter = File.AppendText(OutputFileName)

    What was the result when you inserted a breakpoint at the first line of the method and stepped it through a line at a time watching jsonFilename and OutputFileName?

    Saturday, April 15, 2017 1:24 AM
  • Hi Frank, thanks for your reply.

    The large json files I have are Betfair horse racing market prices gathered during a given day's racing using Betfair's API and a Visual Basic program.

    The vb program pulls info from the API and sends it to my desktop. I then activate the json2csv converter to convert them to csv.

    I hope to reduce the size of the json files in the future as some of the information, for me, is unnecessary and as Acamar said, the code is inefficient.

    I don't know the first thing about that, but I'm guessing that you don't need the CSV at all.

    As a suggestion, deserialize the JSON into one or more instances of class(es) and then consume it directly.


    "A problem well stated is a problem half solved.” - Charles F. Kettering

    Saturday, April 15, 2017 1:34 AM
  • I don't know the first thing about that, but I'm guessing that you don't need the CSV at all.

    I suspect the CSV is for Excel, which is the cause of the size limit.

    Saturday, April 15, 2017 1:39 AM
  • yes and this seems to be a very cumbersome way of doing things. I think I need a more efficient way of getting the info into json in the first place.

    Saturday, April 15, 2017 1:45 AM
  • Yes I need to find a better solution.Thanks for your time Frank, I really appreciate your advice
    Saturday, April 15, 2017 1:56 AM
  • Thanks for your time Acamar, I really appreciate your efforts.
    Saturday, April 15, 2017 1:58 AM
  • Hi Billjoy,

    I find some code that you can refer to.

    Imports System.Collections.Generic
    Imports System.ComponentModel
    Imports System.Data
    Imports System.Drawing
    Imports System.Linq
    Imports System.Text
    Imports System.Windows.Forms
    Imports Newtonsoft.Json
    Imports System.IO
    
    Public Class Form7
        Private Sub Form7_Load(sender As Object, e As EventArgs) Handles MyBase.Load
    
        End Sub
        Public Class AlteredUsers
            Public UserID As Integer
            Public userName As String
        End Class
        Public Class AttributeDetails
            Public Abbreviation As String
            Public DataType As String
            Public DefaultOperator As String
            Public DefaultStatus As Integer
            Public DefaultStatusSpecified As Boolean
            Public DisplayName As String
            Public EffectiveStatus As Integer
            Public EffectiveStatusSpecified As Boolean
            Public HelpText As String
            Public ID As Integer
            Public IDSpecified As Boolean
            Public IsQualifier As Boolean
    
        End Class
        Public Class MyJSONClass
            Public AlertID As Integer
            Public AlertIDSpecified As Boolean
            Public AlertSentTimeStamp As String
            Public AlertSentTimeStampSpecified As Boolean
            Public Altereduser As List(Of AlteredUsers)
            Public Attributedetails As AttributeDetails
    
        End Class
    
        Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
            Dim objMyJSONClass As New MyJSONClass()
            objMyJSONClass.Altereduser = New List(Of AlteredUsers)()
            objMyJSONClass = ConvertJSONToObject(textBox1.Text)
            If objMyJSONClass IsNot Nothing Then
                Dim saveFileDialog1 As New SaveFileDialog()
                saveFileDialog1.Filter = "CSV File|*.csv"
                saveFileDialog1.Title = "Save CSV File"
                If saveFileDialog1.ShowDialog() = DialogResult.OK Then
                    SaveToCSV(saveFileDialog1.FileName, objMyJSONClass)
                End If
            End If
    
        End Sub
        Private Sub SaveToCSV(csvfileName As String, objMyJSONClass As MyJSONClass)
            Dim csvString As String = (Convert.ToString(objMyJSONClass.AlertID + "," + objMyJSONClass.AlertIDSpecified + ",") & objMyJSONClass.AlertSentTimeStamp) + "," + objMyJSONClass.AlertSentTimeStampSpecified
    
            If objMyJSONClass.Altereduser IsNot Nothing Then
                For Each user As AlteredUsers In objMyJSONClass.Altereduser
                    csvString = Convert.ToString("," + user.UserID.ToString() + ",") & user.userName
                Next
            End If
    
            csvString = (Convert.ToString((Convert.ToString((Convert.ToString((Convert.ToString((Convert.ToString(csvString & Convert.ToString(",")) & objMyJSONClass.Attributedetails.Abbreviation) + ",") & objMyJSONClass.Attributedetails.DataType) + ",") & objMyJSONClass.Attributedetails.DefaultOperator) + "," + objMyJSONClass.Attributedetails.DefaultStatus.ToString() + "," + objMyJSONClass.Attributedetails.DefaultStatusSpecified.ToString() + ",") & objMyJSONClass.Attributedetails.DisplayName) + "," + objMyJSONClass.Attributedetails.EffectiveStatus.ToString() + "," + objMyJSONClass.Attributedetails.EffectiveStatusSpecified.ToString() + ",") & objMyJSONClass.Attributedetails.HelpText) + "," + objMyJSONClass.Attributedetails.ID.ToString() + "," + objMyJSONClass.Attributedetails.IDSpecified.ToString() + "," + objMyJSONClass.Attributedetails.IsQualifier.ToString()
    
    
            File.WriteAllText(csvfileName, csvString)
        End Sub
    
        Private Function ConvertJSONToObject(InputJSONString As String) As MyJSONClass
            Dim objMyJSONClass As New MyJSONClass()
            objMyJSONClass.Altereduser = New List(Of AlteredUsers)()
    
            Try
                objMyJSONClass = DirectCast(JsonConvert.DeserializeObject(InputJSONString, objMyJSONClass.[GetType]()), MyJSONClass)
    
                Return objMyJSONClass
            Catch ex As Exception
    
                Return Nothing
            End Try
    
        End Function
    
    End Class

    Best Regards,

    Cherry Bu


    MSDN Community Support
    Please remember to click "Mark as Answer" the responses that resolved your issue, and to click "Unmark as Answer" if not. This can be beneficial to other community members reading this thread. If you have any compliments or complaints to MSDN Support, feel free to contact MSDNFSF@microsoft.com.

    Thursday, May 04, 2017 8:54 AM
    Moderator