Question or problem about Python programming:
I have downloaded some datas as a sqlite database (data.db) and I want to open this database in python and then convert it into pandas dataframe.
This is so far I have done
import sqlite3 import pandas dat = sqlite3.connect('data.db') #connected to database with out error pandas.DataFrame.from_records(dat, index=None, exclude=None, columns=None, coerce_float=False, nrows=None)
But its throwing this error
Traceback (most recent call last): File "", line 1, in File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line 980, in from_records coerce_float=coerce_float) File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line 5353, in _to_arrays if not len(data): TypeError: object of type 'sqlite3.Connection' has no len()
How to convert sqlite database to pandas dataframe
How to solve the problem:
Solution 1:
Despite sqlite being part of the Python Standard Library and is a nice and easy interface to SQLite databases, the Pandas tutorial states:
Note In order to use read_sql_table(), you must have the SQLAlchemy
optional dependency installed.
But Pandas still supports sqlite3 access if you want to avoid installing SQLAlchemy:
import sqlite3 import pandas as pd # Create your connection. cnx = sqlite3.connect('file.db') df = pd.read_sql_query("SELECT * FROM table_name", cnx)
As stated here, but you need to know the name of the used table in advance.
Solution 2:
The line
data = sqlite3.connect('data.db')
opens a connection to the database. There are no records queried up to this. So you have to execute a query afterward and provide this to the pandas DataFrame
constructor.
It should look similar to this
import sqlite3 import pandas as pd dat = sqlite3.connect('data.db') query = dat.execute("SELECT * From ") cols = [column[0] for column in query.description] results= pd.DataFrame.from_records(data = query.fetchall(), columns = cols)
I am not really firm with SQL commands, so you should check the correctness of the query. should be the name of the table in your database.
Solution 3:
Search sqlalchemy
, engine
and database name in google (sqlite in this case):
import pandas as pd import sqlalchemy db_name = "data.db" table_name = "LITTLE_BOBBY_TABLES" engine = sqlalchemy.create_engine("sqlite:///%s" % db_name, execution_options={"sqlite_raw_colnames": True}) df = pd.read_sql_table(table_name, engine)
Solution 4:
I wrote a piece of code up that saves tables in a database file such as .sqlite or .db and creates an excel file out of it with each table as a sheet or makes individual tables into csvs.
Note: You don’t need to know the table names in advance!
import os, fnmatch import sqlite3 import pandas as pd #creates a directory without throwing an error def create_dir(dir): if not os.path.exists(dir): os.makedirs(dir) print("Created Directory : ", dir) else: print("Directory already existed : ", dir) return dir #finds files in a directory corresponding to a regex query def find(pattern, path): result = [] for root, dirs, files in os.walk(path): for name in files: if fnmatch.fnmatch(name, pattern): result.append(os.path.join(root, name)) return result #convert sqlite databases(.db,.sqlite) to pandas dataframe(excel with each table as a different sheet or individual csv sheets) def save_db(dbpath=None,excel_path=None,csv_path=None,extension="*.sqlite",csvs=True,excels=True): if (excels==False and csvs==False): print("Atleast one of the parameters need to be true: csvs or excels") return -1 #little code to find files by extension if dbpath==None: files=find(extension,os.getcwd()) if len(files)>1: print("Multiple files found! Selecting the first one found!") print("To locate your file, set dbpath=") dbpath = find(extension,os.getcwd())[0] if dbpath==None else dbpath print("Reading database file from location :",dbpath) #path handling external_folder,base_name=os.path.split(os.path.abspath(dbpath)) file_name=os.path.splitext(base_name)[0] #firstname without . exten=os.path.splitext(base_name)[-1] #.file_extension internal_folder="Saved_Dataframes_"+file_name main_path=os.path.join(external_folder,internal_folder) create_dir(main_path) excel_path=os.path.join(main_path,"Excel_Multiple_Sheets.xlsx") if excel_path==None else excel_path csv_path=main_path if csv_path==None else csv_path db = sqlite3.connect(dbpath) cursor = db.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cursor.fetchall() print(len(tables),"Tables found :") if excels==True: #for writing to excel(xlsx) we will be needing this! try: import XlsxWriter except ModuleNotFoundError: !pip install XlsxWriter if (excels==True and csvs==True): writer = pd.ExcelWriter(excel_path, engine='xlsxwriter') i=0 for table_name in tables: table_name = table_name[0] table = pd.read_sql_query("SELECT * from %s" % table_name, db) i+=1 print("Parsing Excel Sheet ",i," : ",table_name) table.to_excel(writer, sheet_name=table_name, index=False) print("Parsing CSV File ",i," : ",table_name) table.to_csv(os.path.join(csv_path,table_name + '.csv'), index_label='index') writer.save() elif excels==True: writer = pd.ExcelWriter(excel_path, engine='xlsxwriter') i=0 for table_name in tables: table_name = table_name[0] table = pd.read_sql_query("SELECT * from %s" % table_name, db) i+=1 print("Parsing Excel Sheet ",i," : ",table_name) table.to_excel(writer, sheet_name=table_name, index=False) writer.save() elif csvs==True: i=0 for table_name in tables: table_name = table_name[0] table = pd.read_sql_query("SELECT * from %s" % table_name, db) i+=1 print("Parsing CSV File ",i," : ",table_name) table.to_csv(os.path.join(csv_path,table_name + '.csv'), index_label='index') cursor.close() db.close() return 0 save_db();
Solution 5:
i have stored my data in database.sqlite table name is Reviews
import sqlite3 con=sqlite3.connect("database.sqlite") data=pd.read_sql_query("SELECT * FROM Reviews",con) print(data)