How to open and convert sqlite database to pandas dataframe

Python Programming

Question or problem about Python programming:

I have downloaded some datas as a sqlite database (data.db) and I want to open this database in python and then convert it into pandas dataframe.

This is so far I have done

import sqlite3
import pandas    
dat = sqlite3.connect('data.db') #connected to database with out error
pandas.DataFrame.from_records(dat, index=None, exclude=None, columns=None, coerce_float=False, nrows=None)

But its throwing this error

Traceback (most recent call last):
  File "", line 1, in 
  File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line 980, in from_records
    coerce_float=coerce_float)
  File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line 5353, in _to_arrays
    if not len(data):
TypeError: object of type 'sqlite3.Connection' has no len()

How to convert sqlite database to pandas dataframe

How to solve the problem:

Solution 1:

Despite sqlite being part of the Python Standard Library and is a nice and easy interface to SQLite databases, the Pandas tutorial states:


Note In order to use read_sql_table(), you must have the SQLAlchemy
optional dependency installed.

But Pandas still supports sqlite3 access if you want to avoid installing SQLAlchemy:

import sqlite3 import pandas as pd # Create your connection. cnx = sqlite3.connect('file.db') df = pd.read_sql_query("SELECT * FROM table_name", cnx) 

As stated here, but you need to know the name of the used table in advance.

Solution 2:

The line

data = sqlite3.connect('data.db') 

opens a connection to the database. There are no records queried up to this. So you have to execute a query afterward and provide this to the pandas DataFrame constructor.

It should look similar to this

import sqlite3 import pandas as pd dat = sqlite3.connect('data.db') query = dat.execute("SELECT * From ") cols = [column[0] for column in query.description] results= pd.DataFrame.from_records(data = query.fetchall(), columns = cols) 

I am not really firm with SQL commands, so you should check the correctness of the query. should be the name of the table in your database.

Solution 3:

Search sqlalchemy, engine and database name in google (sqlite in this case):

import pandas as pd import sqlalchemy db_name = "data.db" table_name = "LITTLE_BOBBY_TABLES" engine = sqlalchemy.create_engine("sqlite:///%s" % db_name, execution_options={"sqlite_raw_colnames": True}) df = pd.read_sql_table(table_name, engine) 

Solution 4:

I wrote a piece of code up that saves tables in a database file such as .sqlite or .db and creates an excel file out of it with each table as a sheet or makes individual tables into csvs.

Note: You don’t need to know the table names in advance!

import os, fnmatch import sqlite3 import pandas as pd #creates a directory without throwing an error def create_dir(dir): if not os.path.exists(dir): os.makedirs(dir) print("Created Directory : ", dir) else: print("Directory already existed : ", dir) return dir #finds files in a directory corresponding to a regex query def find(pattern, path): result = [] for root, dirs, files in os.walk(path): for name in files: if fnmatch.fnmatch(name, pattern): result.append(os.path.join(root, name)) return result #convert sqlite databases(.db,.sqlite) to pandas dataframe(excel with each table as a different sheet or individual csv sheets) def save_db(dbpath=None,excel_path=None,csv_path=None,extension="*.sqlite",csvs=True,excels=True): if (excels==False and csvs==False): print("Atleast one of the parameters need to be true: csvs or excels") return -1 #little code to find files by extension if dbpath==None: files=find(extension,os.getcwd()) if len(files)>1: print("Multiple files found! Selecting the first one found!") print("To locate your file, set dbpath=") dbpath = find(extension,os.getcwd())[0] if dbpath==None else dbpath print("Reading database file from location :",dbpath) #path handling external_folder,base_name=os.path.split(os.path.abspath(dbpath)) file_name=os.path.splitext(base_name)[0] #firstname without . exten=os.path.splitext(base_name)[-1] #.file_extension internal_folder="Saved_Dataframes_"+file_name main_path=os.path.join(external_folder,internal_folder) create_dir(main_path) excel_path=os.path.join(main_path,"Excel_Multiple_Sheets.xlsx") if excel_path==None else excel_path csv_path=main_path if csv_path==None else csv_path db = sqlite3.connect(dbpath) cursor = db.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cursor.fetchall() print(len(tables),"Tables found :") if excels==True: #for writing to excel(xlsx) we will be needing this! try: import XlsxWriter except ModuleNotFoundError: !pip install XlsxWriter if (excels==True and csvs==True): writer = pd.ExcelWriter(excel_path, engine='xlsxwriter') i=0 for table_name in tables: table_name = table_name[0] table = pd.read_sql_query("SELECT * from %s" % table_name, db) i+=1 print("Parsing Excel Sheet ",i," : ",table_name) table.to_excel(writer, sheet_name=table_name, index=False) print("Parsing CSV File ",i," : ",table_name) table.to_csv(os.path.join(csv_path,table_name + '.csv'), index_label='index') writer.save() elif excels==True: writer = pd.ExcelWriter(excel_path, engine='xlsxwriter') i=0 for table_name in tables: table_name = table_name[0] table = pd.read_sql_query("SELECT * from %s" % table_name, db) i+=1 print("Parsing Excel Sheet ",i," : ",table_name) table.to_excel(writer, sheet_name=table_name, index=False) writer.save() elif csvs==True: i=0 for table_name in tables: table_name = table_name[0] table = pd.read_sql_query("SELECT * from %s" % table_name, db) i+=1 print("Parsing CSV File ",i," : ",table_name) table.to_csv(os.path.join(csv_path,table_name + '.csv'), index_label='index') cursor.close() db.close() return 0 save_db(); 

Solution 5:

i have stored my data in database.sqlite table name is Reviews

import sqlite3 con=sqlite3.connect("database.sqlite") data=pd.read_sql_query("SELECT * FROM Reviews",con) print(data) 

Hope this helps!