diff --git a/src/main/java/org/wikitolearn/controllers/mediawiki/RevisionMediaWikiController.java b/src/main/java/org/wikitolearn/controllers/mediawiki/RevisionMediaWikiController.java index f9572aa..a0b5986 100644 --- a/src/main/java/org/wikitolearn/controllers/mediawiki/RevisionMediaWikiController.java +++ b/src/main/java/org/wikitolearn/controllers/mediawiki/RevisionMediaWikiController.java @@ -1,94 +1,93 @@ package org.wikitolearn.controllers.mediawiki; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.wikidata.wdtk.wikibaseapi.ApiConnection; -import org.wikitolearn.models.Page; import org.wikitolearn.models.Revision; import org.wikitolearn.utils.MediaWikiApiUtils; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.Map; /** - * This class will handle the query on mediawiki about revisions. + * This class will handle the query on MediaWiki about revisions. * Created by valsdav on 14/03/17. */ @Service public class RevisionMediaWikiController { private static final Logger LOG = LoggerFactory.getLogger(RevisionMediaWikiController.class); @Autowired private MediaWikiApiUtils mediaWikiApiUtils; @Autowired private ObjectMapper mapper; /** - * Get all the revisions for a specific page quering mediawiki api + * Get all the revisions for a specific page querying MediaWiki API * @param apiUrl the MediaWiki API url * @param pageid Pageid of the page of which getting the revisions. * * @return revisions A list that contains all the fetched revisions */ public List getAllRevisionForPage(String apiUrl, int pageid){ ApiConnection connection = mediaWikiApiUtils.getApiConnection(apiUrl); Map parameters = mediaWikiApiUtils.getRevisionParam(pageid); InputStream response; boolean moreRevs = true; JSONArray revsJson = new JSONArray(); List toBeConcat = new ArrayList<>(); List revs = new ArrayList<>(); try { while(moreRevs){ response = mediaWikiApiUtils.sendRequest(connection, "GET", parameters); JSONObject responseJson = mediaWikiApiUtils.streamToJson(response); toBeConcat.add(responseJson.getJSONObject("query").getJSONObject("pages"). getJSONObject(Integer.toString(pageid)).getJSONArray("revisions")); if(responseJson.has("continue")){ String continueFrom = responseJson.getJSONObject("continue").getString("rvcontinue"); parameters.put("rvcontinue", continueFrom); }else{ moreRevs = false; revsJson = concatArrays(toBeConcat); } } revs = mapper.readValue(revsJson.toString(), new TypeReference>(){}); return revs; } catch (JSONException e){ LOG.error("An error occurred while a JSONObject or JSONArray", e.getMessage()); } catch(IOException e){ LOG.error("An error occurred while converting an InputStream to JSONObject", e.getMessage()); } return revs; } /** * This method is an utility. It concatenates the given JSONArrays into one. * @param arrays The arrays to be concatenated * @return result The resulted JSONArray * @throws JSONException */ private JSONArray concatArrays(List arrays) throws JSONException{ JSONArray result = new JSONArray(); for (JSONArray arr : arrays) { for (int i = 0; i < arr.length(); i++) { result.put(arr.get(i)); } } return result; } } diff --git a/src/main/java/org/wikitolearn/controllers/mediawiki/UserMediaWikiController.java b/src/main/java/org/wikitolearn/controllers/mediawiki/UserMediaWikiController.java index ea1810e..95053cf 100644 --- a/src/main/java/org/wikitolearn/controllers/mediawiki/UserMediaWikiController.java +++ b/src/main/java/org/wikitolearn/controllers/mediawiki/UserMediaWikiController.java @@ -1,91 +1,90 @@ package org.wikitolearn.controllers.mediawiki; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.wikidata.wdtk.wikibaseapi.ApiConnection; -import org.wikitolearn.models.Page; import org.wikitolearn.models.User; import org.wikitolearn.utils.MediaWikiApiUtils; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.Map; /** - * Class used to query Mediawiki about Users + * Class used to query MediaWiki about Users * Created by valsdav on 14/03/17. */ @Service public class UserMediaWikiController { private static final Logger LOG = LoggerFactory.getLogger(UserMediaWikiController.class); @Autowired private MediaWikiApiUtils mediaWikiApiUtils; @Autowired private ObjectMapper mapper; /** * Get all the users from MediaWiki instance through its API. * @param apiUrl the MediaWiki API url * @return users A list that contains all the fetched users */ public List getAllUsers(String apiUrl){ ApiConnection connection = mediaWikiApiUtils.getApiConnection(apiUrl); Map parameters = mediaWikiApiUtils.getUserParam(); InputStream response; boolean moreUsers = true; JSONArray usersJson = new JSONArray(); List toBeConcat = new ArrayList<>(); List users = new ArrayList<>(); try { while(moreUsers){ response = mediaWikiApiUtils.sendRequest(connection, "GET", parameters); JSONObject responseJson = mediaWikiApiUtils.streamToJson(response); toBeConcat.add(responseJson.getJSONObject("query").getJSONArray("allusers")); if(responseJson.has("continue")){ String continueFrom = responseJson.getJSONObject("continue").getString("aufrom"); parameters.put("aufrom", continueFrom); }else{ moreUsers = false; usersJson = concatArrays(toBeConcat); } } users = mapper.readValue(usersJson.toString(), new TypeReference>(){}); return users; } catch (JSONException e){ LOG.error("An error occurred while a JSONObject or JSONArray", e.getMessage()); } catch(IOException e){ LOG.error("An error occurred while converting an InputStream to JSONObject", e.getMessage()); } return users; } /** * This method is an utility. It concatenates the given JSONArrays into one. * @param arrays The arrays to be concatenated * @return result The resulted JSONArray * @throws JSONException */ private JSONArray concatArrays(List arrays) throws JSONException{ JSONArray result = new JSONArray(); for (JSONArray arr : arrays) { for (int i = 0; i < arr.length(); i++) { result.put(arr.get(i)); } } return result; } } diff --git a/src/main/java/org/wikitolearn/dao/RevisionDAO.java b/src/main/java/org/wikitolearn/dao/RevisionDAO.java index e53cd65..3516d3b 100644 --- a/src/main/java/org/wikitolearn/dao/RevisionDAO.java +++ b/src/main/java/org/wikitolearn/dao/RevisionDAO.java @@ -1,142 +1,141 @@ package org.wikitolearn.dao; import com.orientechnologies.orient.core.metadata.schema.OClass; import com.orientechnologies.orient.core.metadata.schema.OType; import com.orientechnologies.orient.core.storage.ORecordDuplicatedException; import com.tinkerpop.blueprints.Vertex; import com.tinkerpop.blueprints.impls.orient.OrientGraph; import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; import com.tinkerpop.blueprints.impls.orient.OrientVertexType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; import org.wikitolearn.models.Revision; -import org.wikitolearn.models.User; import org.wikitolearn.utils.DbConnection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; /** * This class will handle the Revision data processing on the OrientDB. * Created by valsdav on 14/03/17. */ @Repository public class RevisionDAO { private static final Logger LOG = LoggerFactory.getLogger(RevisionDAO.class); @Autowired private DbConnection connection; /** * This method is used to create the classes on the DB. * Moreover it creates a unique index on the userid property to avoid duplication. */ public void createDBClass() { LOG.info("Creating DB classes for RevisionDAO..."); OrientGraphNoTx graph = connection.getDbGraphNT(); try{ - //Vertex type for the revision + // Vertex type for the revision OrientVertexType vertex = graph.createVertexType("Revision"); vertex.createProperty("revid", OType.INTEGER).setMandatory(true); vertex.createIndex("revid", OClass.INDEX_TYPE.UNIQUE, "revid"); - //Edge type for the created edge from User to Revision + // Edge type for the created edge from User to Revision graph.createEdgeType("Author"); - //Edge type to connect revision to parent revision + // Edge type to connect revision to parent revision graph.createEdgeType("ParentRevision"); - //Edge type to connect last revision to page vertex + // Edge type to connect last revision to page vertex graph.createEdgeType("LastRevision"); - //Edge type to connect the first revision of a page + // Edge type to connect the first revision of a page graph.createEdgeType("FirstRevision"); } catch( Exception e ) { LOG.error("Something went wrong during class creation. Operation will be rollbacked.", e.getMessage()); graph.rollback(); } finally { graph.shutdown(); } } /** * This method will insert the revisions of one page, creating the link ParentRevision between them and * the link FirstRevision and LastRevision with the Page vertex. Moreover it connects the Users to * the revisions they have created. - * This method must be used only for the firt INIT import, NOT for incremental inserction. + * This method must be used only for the first INIT import, NOT for incremental insertion. * @param pageid * @param revs * @return */ public Boolean insertRevisions(int pageid, List revs){ OrientGraph graph = connection.getGraph(); LOG.info("Starting to insert revisions..."); HashMap revsNodes = new HashMap(); Vertex firstRev = null; Vertex lastRev = null; try{ for(Revision rev : revs){ Map props = new HashMap<>(); props.put("revid", rev.getRevid()); props.put("length", rev.getLength()); props.put("changeCoefficient", rev.getChangeCoefficient()); props.put( "currentMeanVote", rev.getCurrentMeanVote()); props.put( "currentVotesReliability", rev.getCurrentVotesReliability()); props.put( "currentNormalizedVotesReliability", rev.getCurrentNormalisesVotesReliability()); props.put( "totalMeanVote", rev.getTotalMeanVote()); props.put( "totalVotesReliability", rev.getTotalVotesReliability()); props.put( "totalNormalizedVotesReliability", rev.getTotalNormalisesVotesReliability()); props.put("validated", rev.isValidated()); Vertex revNode = graph.addVertex("class:Revision", props); LOG.info("Revision inserted " + revNode.toString()); revsNodes.put(Integer.toString(rev.getRevid()), revNode); if (rev.getParentid() == 0){ firstRev = revNode; } if (lastRev==null || rev.getRevid() > (int) lastRev.getProperty("revid")){ lastRev = revNode; } - //connecting the creator of the revisions + // Connecting the creator of the revisions Vertex userCreator = null; try{ userCreator = graph.getVertices("User.userid", rev.getUserid()).iterator().next(); } catch (NoSuchElementException e){ //if the user is not found we link it to the Anonimous user. userCreator = graph.getVertices("User.userid", "0" ).iterator().next(); } graph.addEdge("class:Author", userCreator, revNode, "Author"); } - //now we have to create the the links between revisions + // Now we have to create the the links between revisions for (Revision r : revs){ if (r.getParentid() != 0){ graph.addEdge("class:ParentRevision", revsNodes.get(Integer.toString(r.getRevid())), revsNodes.get(Integer.toString(r.getParentid())), "ParentRevision"); } } - //now let's create the LastRevision and FirstRevision edges + // Now let's create the LastRevision and FirstRevision edges Vertex page = graph.getVertices("Page.pageid", pageid).iterator().next(); graph.addEdge("class:LastRevision", page, lastRev, "LastRevision"); graph.addEdge("class:FirstRevision", page, firstRev, "FirstRevision"); graph.commit(); LOG.info(String.format("Revisions of page %s insertion committed", pageid)); return true; } catch (ORecordDuplicatedException or) { LOG.error("Some of the pages are duplicates. Operation will be rollbacked.", or.getMessage()); graph.rollback(); } catch( Exception e ) { LOG.error("Something went wrong during user insertion. Operation will be rollbacked.", e.getMessage()); graph.rollback(); } finally { graph.shutdown(); } return false; } } diff --git a/src/main/java/org/wikitolearn/dao/UserDAO.java b/src/main/java/org/wikitolearn/dao/UserDAO.java index 84faae1..8b86ad9 100644 --- a/src/main/java/org/wikitolearn/dao/UserDAO.java +++ b/src/main/java/org/wikitolearn/dao/UserDAO.java @@ -1,92 +1,91 @@ /** * */ package org.wikitolearn.dao; import com.orientechnologies.orient.core.metadata.schema.OClass; import com.orientechnologies.orient.core.metadata.schema.OType; import com.orientechnologies.orient.core.storage.ORecordDuplicatedException; import com.tinkerpop.blueprints.Vertex; import com.tinkerpop.blueprints.impls.orient.OrientGraph; import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; import com.tinkerpop.blueprints.impls.orient.OrientVertexType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; -import org.wikitolearn.models.Page; import org.wikitolearn.models.User; import org.wikitolearn.utils.DbConnection; import java.util.HashMap; import java.util.List; import java.util.Map; /** * @author alessandro * */ @Repository public class UserDAO { private static final Logger LOG = LoggerFactory.getLogger(UserDAO.class); @Autowired private DbConnection connection; /** * This method is used to create the class on the DB. * Moreover it creates a unique index on the userid property to avoid duplication. */ public void createDBClass() { LOG.info("Creating DB classes for RevisionDAO..."); OrientGraphNoTx graph = connection.getDbGraphNT(); try{ OrientVertexType vertex = graph.createVertexType("User"); vertex.createProperty("userid", OType.INTEGER).setMandatory(true); vertex.createIndex("userid", OClass.INDEX_TYPE.UNIQUE, "userid"); } catch( Exception e ) { LOG.error("Something went wrong during class creation. Operation will be rollbacked.", e.getMessage()); graph.rollback(); } finally { graph.shutdown(); } } /** * Insert all the given users in the database as vertexes. * If there are duplicates all the insertion is rolled back. * @param users List The pages to be inserted * @return boolean True if insertion was committed, false otherwise */ public Boolean insertUsers(List users){ OrientGraph graph = connection.getGraph(); LOG.info("Starting to insert users..."); try{ for(User p : users){ Map props = new HashMap<>(); props.put("userid", p.getUserid()); props.put( "username", p.getUsername()); props.put("votesReliability", p.getVotesReliability()); props.put("contributesReliability", p.getContributesReliability()); props.put("totalReliability", p.getTotalReliability()); Vertex userNode = graph.addVertex("class:User", props); LOG.info("User inserted " + userNode.toString()); } graph.commit(); LOG.info("Users insertion committed"); return true; } catch (ORecordDuplicatedException or) { LOG.error("Some of the pages are duplicates. Operation will be rollbacked.", or.getMessage()); graph.rollback(); } catch( Exception e ) { LOG.error("Something went wrong during user insertion. Operation will be rollbacked.", e.getMessage()); graph.rollback(); } finally { graph.shutdown(); } return false; } } diff --git a/src/main/java/org/wikitolearn/utils/DbConnection.java b/src/main/java/org/wikitolearn/utils/DbConnection.java index 0abff74..f14153d 100644 --- a/src/main/java/org/wikitolearn/utils/DbConnection.java +++ b/src/main/java/org/wikitolearn/utils/DbConnection.java @@ -1,55 +1,53 @@ /** * */ package org.wikitolearn.utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import com.orientechnologies.orient.core.intent.OIntentMassiveInsert; import com.tinkerpop.blueprints.impls.orient.OrientGraph; import com.tinkerpop.blueprints.impls.orient.OrientGraphFactory; import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; import org.wikitolearn.dao.UserDAO; -import sun.reflect.generics.reflectiveObjects.LazyReflectiveObjectGenerator; - /** * @author alessandro * */ @Service public class DbConnection { private static final Logger LOG = LoggerFactory.getLogger(UserDAO.class); private OrientGraphFactory factory; @Autowired public DbConnection(@Value("${DB_URL}") String dbUrl, @Value("${DB_USER}") String dbUser, @Value("${DB_PWD}") String dbPwd){ factory = new OrientGraphFactory(dbUrl, dbUser, dbPwd).setupPool(1, 20); } /** * This method will return the OreintDB graph instance after connection. * @return Transaction enabled OrientGraph object */ public OrientGraph getGraph(){ LOG.info("Getting an instance of OrientDB...."); return factory.getTx(); } /** * This method will return the OreintDB graph instance after connection, * for massive inserts to improve performance,no transaction method * @return Transaction disabled OrientGraph object */ public OrientGraphNoTx getDbGraphNT() { LOG.info("Getting a NoTX instance of OrientDB...."); factory.declareIntent(new OIntentMassiveInsert()); return factory.getNoTx(); } }