Wiki source for QueriesForSpeeches
=====Queries for Public Statements project=====
>>SpeechesGuide
[[UsefulResearchQueries Useful Research Queries]]
[[BreakTranscript Speech Error Queries]]
[[TagsManagement Tags Management]]
[[TagsQueries Tags Queries]]>>//The query below displays the total number of speeches in our database by candidates from each state listed by state.//
%%(language-ref)
SELECT c.state_id, COUNT(s.speech_id)
FROM speech s JOIN speech_candidate USING (speech_id) JOIN candidate c USING (candidate_id)
GROUP BY c.state_id
ORDER BY c.state_id
%%
~- Using the GROUP BY command to group the counts by state_id allows the computer to use the aggregate function COUNT in the Select clause.
//The query below displays the total number of speeches containing the word "deficit" given by candidates from each state listed by number of speeches in descending order.//
%%(language-ref)
SELECT c.state_id, COUNT(s.speech_id) speeches
FROM speech s JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
WHERE s.speechtext LIKE '%deficit%'
GROUP BY c.state_id
ORDER BY speeches DESC
%%
~- The % signs around the word "deficit" are wildcards (i.e. they hold the place of any number of characters on either end of that word). LIKE (instead of =) allows the wildcards to work properly.
~- ORDER BY automatically orders by the field requested in ascending order. To order in descending order, include the command DESC after the field.
//The query below displays the total number of speeches containing the word "deficit" for each state, in addition to the percentage of total speeches from each state that include the word "deficit" listed by ascending percentage.//
%%(language-ref)
SELECT a.state_id, a.deficit, b.total, (deficit*100.0)/total ratio
FROM
(SELECT c.state_id, COUNT(s.speech_id) deficit
FROM speech s JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
WHERE s.speechtext LIKE '%deficit%'
GROUP BY c.state_id) a
JOIN
(SELECT c.state_id, COUNT(s.speech_id) total
FROM speech s JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
GROUP BY c.state_id) b
USING (state_id)
ORDER BY ratio
%%
~- This query contains the two previous queries on this page as two subqueries (named "a" and "b").
~- Using symbols like * and / allow you to include mathematical operations within your selected fields, which is how the percentage is constructed in this query.
~- SQL will automatically round down to the nearest whole number UNLESS you multiply the equation (or field) by a number including at least one decimal place (i.e. 1.0 or 100.0). In this query, if "deficit" were not multiplied by 100.0, all the ratios would show up as 0.
//The query below displays the number of characters in each speech in our database.//
%%(language-ref)
SELECT speech_id, char_length (speechtext)
FROM speech
%%
~- "char_length" functions just like COUNT.
//The query below displays the total number of characters in all speeches from each state and the average number of characters in all speeches from each state.//
%%(language-ref)
SELECT c.state_id, SUM(cl), SUM(cl)/COUNT(speech_id) average
FROM
(SELECT speech_id, char_length (speechtext) cl
FROM speech) a
JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
GROUP BY c.state_id
ORDER BY average DESC
%%
~- This query contains the previous query on this page as a subquery (named "a") from which to pull the character lengths of each speech.
~- The SUM command adds the numbers in a certain field together. In this case, it adds all the character lengths of speeches in a given state, because we have grouped by state_id.
//The query below displays the tags used between two dates in descending order of occurrence://
%%(language-ref)
SELECT t.name, COUNT (t.name)
FROM tag t JOIN speech_tag st USING (tag_id) JOIN speech s USING (speech_id)
WHERE s.speechdate BETWEEN 'xxxx-xx-xx' AND 'xxxx-xx-xx'
GROUP BY t.name
ORDER BY COUNT DESC
%%
~- This query can easily be converted for categories by switching the tag tables for category tables.
//The query below displays the number of speeches given every day://
%%(language-ref)
SELECT s.speechdate, COUNT (s.speech_id)
FROM speech s
GROUP BY s.speechdate
ORDER BY s.speechdate DESC
%%
//The query below displays the average number of speeches per month counted by speechdate (you can substitute the "created" field here)://
%%(language-ref)
SELECT AVG (a.count) June, AVG (b.count) July, AVG (c.count) August
FROM
(SELECT s.speechdate, COUNT (s.speech_id)
FROM speech s
WHERE s.speechdate BETWEEN '2013-06-01' AND '2013-06-30'
GROUP BY s.speechdate) a,
(SELECT s.speechdate, COUNT (s.speech_id)
FROM speech s
WHERE s.speechdate BETWEEN '2013-07-01' AND '2013-07-31'
GROUP BY s.speechdate) b,
(SELECT s.speechdate, COUNT (s.speech_id)
FROM speech s
WHERE s.speechdate BETWEEN '2013-08-01' AND '2013-08-31'
GROUP BY s.speechdate) c
%%
//The query below displays the vocal activity of each state://
%%(language-ref)
SELECT a.state_id, A.X AS SpeechesPerState, B.Y AS OfficialsPerState, (a.x/b.y) AS VocalActivity
From
(SELECT c.state_id, COUNT(s.speech_id) as X
FROM speech s JOIN speech_candidate USING (speech_id) JOIN candidate c USING (candidate_id) JOIN office_candidate oc USING (candidate_id) JOIN office o USING (office_id)
WHERE o.officetype_id = 'C' AND oc.officecandidatestatus_id = 1
GROUP BY c.state_id
ORDER BY X) A
JOIN
(SELECT c.state_id, COUNT(c.candidate_id) as Y
FROM candidate c JOIN office_candidate oc USING (candidate_id) JOIN office o USING (office_id)
WHERE o.officetype_id = 'C' AND oc.officecandidatestatus_id = 1
GROUP BY c.state_id
ORDER BY Y) B
USING (state_id)
ORDER BY VocalActivity DESC
%%
//The query below displays the tag usage per day://
%%(language-ref)
SELECT a.speechdate as date, a.name as tags, a.Turbo as usage
FROM
(SELECT s.speechdate, t.name, COUNT (t.name) as Turbo
FROM tag t JOIN speech_tag st USING (tag_id) JOIN speech s USING (speech_id)
WHERE s.speechdate > '2006-01-01' AND s.speechdate < '2014-12-31'
GROUP BY s.speechdate, t.name) a
WHERE a.Turbo > 10
GROUP BY a.speechdate, a.name, a.Turbo
ORDER BY a.speechdate DESC, a.Turbo DESC, a.name
%%
**__Speech-Tag Queries__**
//See if a politician has any public statements with a particular tag//
%%(language-ref)
SELECT c.candidate_id, s.title, t.tag_id, s.speechdate, s.speech_id, speechtext
FROM speech s JOIN speech_tag t USING (speech_id) JOIN speech_candidate c USING (speech_id)
WHERE t.tag_id = '2825' and c.candidate_id='53270'
ORDER BY c.candidate_id, s.title, t.tag_id, s.speechdate, s.speech_id
%%
//See tags that have been used more than a certain number of times within a period of time//
%%(language-ref)
SELECT a.speechdate as date, a.name as tags, a.Turbo as usage
FROM
(SELECT s.speechdate, t.name, COUNT (t.name) as Turbo
FROM tag t JOIN speech_tag st USING (tag_id) JOIN speech s USING (speech_id)
WHERE s.speechdate > '2006-01-01' AND s.speechdate < '2015-12-31'
GROUP BY s.speechdate, t.name) a
WHERE a.Turbo > 10
GROUP BY a.speechdate, a.name, a.Turbo
ORDER BY a.speechdate DESC, a.Turbo DESC, a.name
%%
//See which speeches use multiple specific tags//
%%(language-ref)
SELECT x.speech_id, x.speechdate, x.title, COUNT(x.speech_id)
FROM
(SELECT s.speech_id, s.speechdate, s.title, t.tag_id, t.name
FROM speech s
JOIN speech_tag st
USING (speech_id)
JOIN tag t
ON (st.tag_id = t.tag_id)
WHERE t.tag_id = 254 OR t.tag_id = 88679) x
GROUP BY x.speech_id, x.speechdate, x.title
HAVING COUNT(x.speech_id) > 1
%%
~- Plug in the tag_id's of the tags you want to identify.
**__Multi-Candidate Data__**
//Public statements shared within specific group of people//
%%(language-ref)
SELECT a.speech_id, title, speechdate
FROM
(SELECT s.speech_id, Count (s.speech_id)
FROM speech s JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
WHERE c.candidate_id IN (15723, 1601, 156006, 117285, 120023, 135705, 1657, 17340, 27017, 111064 )
GROUP BY speech_id) a JOIN speech USING (speech_id)
WHERE a.count >1
ORDER BY speechdate desc
%%
//Public statements shared between a specific individual and a specific group of people
It's a very similar query to the one directly above. This one narrows down the results of the first query.//
%%(language-ref)
SELECT a.speech_id, title, speechdate
FROM
(SELECT s.speech_id, Count (s.speech_id)
FROM speech s JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
WHERE c.candidate_id IN (15723, 1601, 156006, 117285, 120023, 135705, 1657, 17340, 27017, 111064 )
GROUP BY speech_id) a
JOIN speech USING (speech_id)
JOIN speech_candidate
USING (speech_id)
WHERE a.count >1 AND candidate_id = 15723
ORDER BY speechdate desc
%%
SpeechesGuide
[[UsefulResearchQueries Useful Research Queries]]
[[BreakTranscript Speech Error Queries]]
>>SpeechesGuide
[[UsefulResearchQueries Useful Research Queries]]
[[BreakTranscript Speech Error Queries]]
[[TagsManagement Tags Management]]
[[TagsQueries Tags Queries]]>>//The query below displays the total number of speeches in our database by candidates from each state listed by state.//
%%(language-ref)
SELECT c.state_id, COUNT(s.speech_id)
FROM speech s JOIN speech_candidate USING (speech_id) JOIN candidate c USING (candidate_id)
GROUP BY c.state_id
ORDER BY c.state_id
%%
~- Using the GROUP BY command to group the counts by state_id allows the computer to use the aggregate function COUNT in the Select clause.
//The query below displays the total number of speeches containing the word "deficit" given by candidates from each state listed by number of speeches in descending order.//
%%(language-ref)
SELECT c.state_id, COUNT(s.speech_id) speeches
FROM speech s JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
WHERE s.speechtext LIKE '%deficit%'
GROUP BY c.state_id
ORDER BY speeches DESC
%%
~- The % signs around the word "deficit" are wildcards (i.e. they hold the place of any number of characters on either end of that word). LIKE (instead of =) allows the wildcards to work properly.
~- ORDER BY automatically orders by the field requested in ascending order. To order in descending order, include the command DESC after the field.
//The query below displays the total number of speeches containing the word "deficit" for each state, in addition to the percentage of total speeches from each state that include the word "deficit" listed by ascending percentage.//
%%(language-ref)
SELECT a.state_id, a.deficit, b.total, (deficit*100.0)/total ratio
FROM
(SELECT c.state_id, COUNT(s.speech_id) deficit
FROM speech s JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
WHERE s.speechtext LIKE '%deficit%'
GROUP BY c.state_id) a
JOIN
(SELECT c.state_id, COUNT(s.speech_id) total
FROM speech s JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
GROUP BY c.state_id) b
USING (state_id)
ORDER BY ratio
%%
~- This query contains the two previous queries on this page as two subqueries (named "a" and "b").
~- Using symbols like * and / allow you to include mathematical operations within your selected fields, which is how the percentage is constructed in this query.
~- SQL will automatically round down to the nearest whole number UNLESS you multiply the equation (or field) by a number including at least one decimal place (i.e. 1.0 or 100.0). In this query, if "deficit" were not multiplied by 100.0, all the ratios would show up as 0.
//The query below displays the number of characters in each speech in our database.//
%%(language-ref)
SELECT speech_id, char_length (speechtext)
FROM speech
%%
~- "char_length" functions just like COUNT.
//The query below displays the total number of characters in all speeches from each state and the average number of characters in all speeches from each state.//
%%(language-ref)
SELECT c.state_id, SUM(cl), SUM(cl)/COUNT(speech_id) average
FROM
(SELECT speech_id, char_length (speechtext) cl
FROM speech) a
JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
GROUP BY c.state_id
ORDER BY average DESC
%%
~- This query contains the previous query on this page as a subquery (named "a") from which to pull the character lengths of each speech.
~- The SUM command adds the numbers in a certain field together. In this case, it adds all the character lengths of speeches in a given state, because we have grouped by state_id.
//The query below displays the tags used between two dates in descending order of occurrence://
%%(language-ref)
SELECT t.name, COUNT (t.name)
FROM tag t JOIN speech_tag st USING (tag_id) JOIN speech s USING (speech_id)
WHERE s.speechdate BETWEEN 'xxxx-xx-xx' AND 'xxxx-xx-xx'
GROUP BY t.name
ORDER BY COUNT DESC
%%
~- This query can easily be converted for categories by switching the tag tables for category tables.
//The query below displays the number of speeches given every day://
%%(language-ref)
SELECT s.speechdate, COUNT (s.speech_id)
FROM speech s
GROUP BY s.speechdate
ORDER BY s.speechdate DESC
%%
//The query below displays the average number of speeches per month counted by speechdate (you can substitute the "created" field here)://
%%(language-ref)
SELECT AVG (a.count) June, AVG (b.count) July, AVG (c.count) August
FROM
(SELECT s.speechdate, COUNT (s.speech_id)
FROM speech s
WHERE s.speechdate BETWEEN '2013-06-01' AND '2013-06-30'
GROUP BY s.speechdate) a,
(SELECT s.speechdate, COUNT (s.speech_id)
FROM speech s
WHERE s.speechdate BETWEEN '2013-07-01' AND '2013-07-31'
GROUP BY s.speechdate) b,
(SELECT s.speechdate, COUNT (s.speech_id)
FROM speech s
WHERE s.speechdate BETWEEN '2013-08-01' AND '2013-08-31'
GROUP BY s.speechdate) c
%%
//The query below displays the vocal activity of each state://
%%(language-ref)
SELECT a.state_id, A.X AS SpeechesPerState, B.Y AS OfficialsPerState, (a.x/b.y) AS VocalActivity
From
(SELECT c.state_id, COUNT(s.speech_id) as X
FROM speech s JOIN speech_candidate USING (speech_id) JOIN candidate c USING (candidate_id) JOIN office_candidate oc USING (candidate_id) JOIN office o USING (office_id)
WHERE o.officetype_id = 'C' AND oc.officecandidatestatus_id = 1
GROUP BY c.state_id
ORDER BY X) A
JOIN
(SELECT c.state_id, COUNT(c.candidate_id) as Y
FROM candidate c JOIN office_candidate oc USING (candidate_id) JOIN office o USING (office_id)
WHERE o.officetype_id = 'C' AND oc.officecandidatestatus_id = 1
GROUP BY c.state_id
ORDER BY Y) B
USING (state_id)
ORDER BY VocalActivity DESC
%%
//The query below displays the tag usage per day://
%%(language-ref)
SELECT a.speechdate as date, a.name as tags, a.Turbo as usage
FROM
(SELECT s.speechdate, t.name, COUNT (t.name) as Turbo
FROM tag t JOIN speech_tag st USING (tag_id) JOIN speech s USING (speech_id)
WHERE s.speechdate > '2006-01-01' AND s.speechdate < '2014-12-31'
GROUP BY s.speechdate, t.name) a
WHERE a.Turbo > 10
GROUP BY a.speechdate, a.name, a.Turbo
ORDER BY a.speechdate DESC, a.Turbo DESC, a.name
%%
**__Speech-Tag Queries__**
//See if a politician has any public statements with a particular tag//
%%(language-ref)
SELECT c.candidate_id, s.title, t.tag_id, s.speechdate, s.speech_id, speechtext
FROM speech s JOIN speech_tag t USING (speech_id) JOIN speech_candidate c USING (speech_id)
WHERE t.tag_id = '2825' and c.candidate_id='53270'
ORDER BY c.candidate_id, s.title, t.tag_id, s.speechdate, s.speech_id
%%
//See tags that have been used more than a certain number of times within a period of time//
%%(language-ref)
SELECT a.speechdate as date, a.name as tags, a.Turbo as usage
FROM
(SELECT s.speechdate, t.name, COUNT (t.name) as Turbo
FROM tag t JOIN speech_tag st USING (tag_id) JOIN speech s USING (speech_id)
WHERE s.speechdate > '2006-01-01' AND s.speechdate < '2015-12-31'
GROUP BY s.speechdate, t.name) a
WHERE a.Turbo > 10
GROUP BY a.speechdate, a.name, a.Turbo
ORDER BY a.speechdate DESC, a.Turbo DESC, a.name
%%
//See which speeches use multiple specific tags//
%%(language-ref)
SELECT x.speech_id, x.speechdate, x.title, COUNT(x.speech_id)
FROM
(SELECT s.speech_id, s.speechdate, s.title, t.tag_id, t.name
FROM speech s
JOIN speech_tag st
USING (speech_id)
JOIN tag t
ON (st.tag_id = t.tag_id)
WHERE t.tag_id = 254 OR t.tag_id = 88679) x
GROUP BY x.speech_id, x.speechdate, x.title
HAVING COUNT(x.speech_id) > 1
%%
~- Plug in the tag_id's of the tags you want to identify.
**__Multi-Candidate Data__**
//Public statements shared within specific group of people//
%%(language-ref)
SELECT a.speech_id, title, speechdate
FROM
(SELECT s.speech_id, Count (s.speech_id)
FROM speech s JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
WHERE c.candidate_id IN (15723, 1601, 156006, 117285, 120023, 135705, 1657, 17340, 27017, 111064 )
GROUP BY speech_id) a JOIN speech USING (speech_id)
WHERE a.count >1
ORDER BY speechdate desc
%%
//Public statements shared between a specific individual and a specific group of people
It's a very similar query to the one directly above. This one narrows down the results of the first query.//
%%(language-ref)
SELECT a.speech_id, title, speechdate
FROM
(SELECT s.speech_id, Count (s.speech_id)
FROM speech s JOIN speech_candidate sc USING (speech_id) JOIN candidate c USING (candidate_id)
WHERE c.candidate_id IN (15723, 1601, 156006, 117285, 120023, 135705, 1657, 17340, 27017, 111064 )
GROUP BY speech_id) a
JOIN speech USING (speech_id)
JOIN speech_candidate
USING (speech_id)
WHERE a.count >1 AND candidate_id = 15723
ORDER BY speechdate desc
%%
SpeechesGuide
[[UsefulResearchQueries Useful Research Queries]]
[[BreakTranscript Speech Error Queries]]